diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8042661533e1d..1e31265216e3e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -37,18 +37,17 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1") set(CMAKE_EXPORT_COMPILE_COMMANDS 1) endif() -# Enable using a custom GCC toolchain to build Arrow -if (NOT "$ENV{ARROW_GCC_ROOT}" STREQUAL "") - set(GCC_ROOT $ENV{ARROW_GCC_ROOT}) - set(CMAKE_C_COMPILER ${GCC_ROOT}/bin/gcc) - set(CMAKE_CXX_COMPILER ${GCC_ROOT}/bin/g++) -endif() - if(APPLE) # In newer versions of CMake, this is the default setting set(CMAKE_MACOSX_RPATH 1) endif() +find_program(CCACHE_FOUND ccache) +if(CCACHE_FOUND) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) +endif(CCACHE_FOUND) + # ---------------------------------------------------------------------- # cmake options @@ -126,38 +125,16 @@ endif () # Add common flags set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}") -# Required to avoid static linking errors with dependencies -add_definitions(-fPIC) - # Determine compiler version include(CompilerInfo) if ("${COMPILER_FAMILY}" STREQUAL "clang") - # Clang helpfully provides a few extensions from C++11 such as the 'override' - # keyword on methods. This doesn't change behavior, and we selectively enable - # it in src/gutil/port.h only on clang. So, we can safely use it, and don't want - # to trigger warnings when we do so. - # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-extensions") - # Using Clang with ccache causes a bunch of spurious warnings that are # purportedly fixed in the next version of ccache. See the following for details: # # http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html # http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments") - - # Only hardcode -fcolor-diagnostics if stderr is opened on a terminal. Otherwise - # the color codes show up as noisy artifacts. - # - # This test is imperfect because 'cmake' and 'make' can be run independently - # (with different terminal options), and we're testing during the former. - execute_process(COMMAND test -t 2 RESULT_VARIABLE ARROW_IS_TTY) - if ((${ARROW_IS_TTY} EQUAL 0) AND (NOT ("$ENV{TERM}" STREQUAL "dumb"))) - message("Running in a controlling terminal") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics") - else() - message("Running without a controlling terminal or in a dumb terminal") - endif() endif() # Sanity check linking option. @@ -278,12 +255,6 @@ set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}") set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}") include_directories(src) -############################################################ -# Visibility -############################################################ -# For generate_export_header() and add_compiler_export_flags(). -include(GenerateExportHeader) - ############################################################ # Testing ############################################################ @@ -456,21 +427,33 @@ endif() # Subdirectories ############################################################ -add_subdirectory(src/arrow) -add_subdirectory(src/arrow/util) -add_subdirectory(src/arrow/table) -add_subdirectory(src/arrow/types) - -set(LINK_LIBS - arrow_util - arrow_table - arrow_types) +set(LIBARROW_LINK_LIBS +) set(ARROW_SRCS src/arrow/array.cc src/arrow/builder.cc src/arrow/field.cc src/arrow/type.cc + + src/arrow/table/column.cc + src/arrow/table/schema.cc + src/arrow/table/table.cc + + src/arrow/types/construct.cc + src/arrow/types/floating.cc + src/arrow/types/integer.cc + src/arrow/types/json.cc + src/arrow/types/list.cc + src/arrow/types/primitive.cc + src/arrow/types/string.cc + src/arrow/types/struct.cc + src/arrow/types/union.cc + + src/arrow/util/bit-util.cc + src/arrow/util/buffer.cc + src/arrow/util/memory-pool.cc + src/arrow/util/status.cc ) set(LIBARROW_LINKAGE "SHARED") @@ -479,8 +462,15 @@ add_library(arrow ${LIBARROW_LINKAGE} ${ARROW_SRCS} ) -target_link_libraries(arrow ${LINK_LIBS}) -set_target_properties(arrow PROPERTIES LINKER_LANGUAGE CXX) +set_target_properties(arrow + PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}") +target_link_libraries(arrow ${LIBARROW_LINK_LIBS}) + +add_subdirectory(src/arrow) +add_subdirectory(src/arrow/util) +add_subdirectory(src/arrow/table) +add_subdirectory(src/arrow/types) install(TARGETS arrow LIBRARY DESTINATION lib diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h index 899e8aae19c0e..377c8a4a257c4 100644 --- a/cpp/src/arrow/api.h +++ b/cpp/src/arrow/api.h @@ -15,7 +15,25 @@ // specific language governing permissions and limitations // under the License. +// Coarse public API while the library is in development + #ifndef ARROW_API_H #define ARROW_API_H +#include "arrow/array.h" +#include "arrow/builder.h" +#include "arrow/field.h" +#include "arrow/type.h" + +#include "arrow/table/column.h" +#include "arrow/table/schema.h" +#include "arrow/table/table.h" + +#include "arrow/types/boolean.h" +#include "arrow/types/floating.h" +#include "arrow/types/integer.h" +#include "arrow/types/list.h" +#include "arrow/types/string.h" +#include "arrow/types/struct.h" + #endif // ARROW_API_H diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt index 68bf3148a9889..26d843d853bfb 100644 --- a/cpp/src/arrow/table/CMakeLists.txt +++ b/cpp/src/arrow/table/CMakeLists.txt @@ -19,21 +19,6 @@ # arrow_table ####################################### -set(TABLE_SRCS - column.cc - schema.cc - table.cc -) - -set(TABLE_LIBS -) - -add_library(arrow_table STATIC - ${TABLE_SRCS} -) -target_link_libraries(arrow_table ${TABLE_LIBS}) -SET_TARGET_PROPERTIES(arrow_table PROPERTIES LINKER_LANGUAGE CXX) - # Headers: top level install(FILES column.h diff --git a/cpp/src/arrow/table/schema-test.cc b/cpp/src/arrow/table/schema-test.cc index 0cf1b3c5f9a8e..5b5393efecce9 100644 --- a/cpp/src/arrow/table/schema-test.cc +++ b/cpp/src/arrow/table/schema-test.cc @@ -97,10 +97,10 @@ TEST_F(TestSchema, ToString) { auto schema = std::make_shared(fields); std::string result = schema->ToString(); - std::string expected = R"(f0 ?int32 -f1 uint8 -f2 ?string -f3 ?list + std::string expected = R"(f0 int32 +f1 uint8 not null +f2 string +f3 list )"; ASSERT_EQ(expected, result); diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index ff145e2c1e3b4..d8d2a4e98c16e 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -19,6 +19,7 @@ namespace arrow { +const std::shared_ptr NA = std::make_shared(); const std::shared_ptr BOOL = std::make_shared(); const std::shared_ptr UINT8 = std::make_shared(); const std::shared_ptr UINT16 = std::make_shared(); diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 4193a0e8bc851..264f08269f5a0 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -71,49 +71,46 @@ struct LogicalType { UINT64 = 7, INT64 = 8, - // A boolean value represented as 1 byte - BOOL = 9, - // A boolean value represented as 1 bit - BIT = 10, + BOOL = 9, // 4-byte floating point value - FLOAT = 11, + FLOAT = 10, // 8-byte floating point value - DOUBLE = 12, + DOUBLE = 11, // CHAR(N): fixed-length UTF8 string with length N - CHAR = 13, + CHAR = 12, // UTF8 variable-length string as List - STRING = 14, + STRING = 13, // VARCHAR(N): Null-terminated string type embedded in a CHAR(N + 1) - VARCHAR = 15, + VARCHAR = 14, // Variable-length bytes (no guarantee of UTF8-ness) - BINARY = 16, + BINARY = 15, // By default, int32 days since the UNIX epoch - DATE = 17, + DATE = 16, // Exact timestamp encoded with int64 since UNIX epoch // Default unit millisecond - TIMESTAMP = 18, + TIMESTAMP = 17, // Timestamp as double seconds since the UNIX epoch - TIMESTAMP_DOUBLE = 19, + TIMESTAMP_DOUBLE = 18, // Exact time encoded with int64, default unit millisecond - TIME = 20, + TIME = 19, // Precision- and scale-based decimal type. Storage type depends on the // parameters. - DECIMAL = 21, + DECIMAL = 20, // Decimal value encoded as a text string - DECIMAL_TEXT = 22, + DECIMAL_TEXT = 21, // A list of some logical data type LIST = 30, @@ -141,7 +138,9 @@ struct DataType { type(type), nullable(nullable) {} - virtual bool Equals(const DataType* other) { + virtual ~DataType() {} + + bool Equals(const DataType* other) { // Call with a pointer so more friendly to subclasses return this == other || (this->type == other->type && this->nullable == other->nullable); @@ -184,11 +183,10 @@ struct PrimitiveType : public DataType { : DataType(Derived::type_enum, nullable) {} virtual std::string ToString() const { - std::string result; - if (nullable) { - result.append("?"); + std::string result(static_cast(this)->name()); + if (!nullable) { + result.append(" not null"); } - result.append(static_cast(this)->name()); return result; } }; @@ -205,6 +203,10 @@ struct PrimitiveType : public DataType { return NAME; \ } +struct NullType : public PrimitiveType { + PRIMITIVE_DECL(NullType, void, NA, 0, "null"); +}; + struct BooleanType : public PrimitiveType { PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool"); }; @@ -249,6 +251,7 @@ struct DoubleType : public PrimitiveType { PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double"); }; +extern const std::shared_ptr NA; extern const std::shared_ptr BOOL; extern const std::shared_ptr UINT8; extern const std::shared_ptr UINT16; diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt index e090aead1f8b9..bae4b6235ff1d 100644 --- a/cpp/src/arrow/types/CMakeLists.txt +++ b/cpp/src/arrow/types/CMakeLists.txt @@ -19,27 +19,6 @@ # arrow_types ####################################### -set(TYPES_SRCS - construct.cc - floating.cc - integer.cc - json.cc - list.cc - primitive.cc - string.cc - struct.cc - union.cc -) - -set(TYPES_LIBS -) - -add_library(arrow_types STATIC - ${TYPES_SRCS} -) -target_link_libraries(arrow_types ${TYPES_LIBS}) -SET_TARGET_PROPERTIES(arrow_types PROPERTIES LINKER_LANGUAGE CXX) - # Headers: top level install(FILES boolean.h diff --git a/cpp/src/arrow/types/json.cc b/cpp/src/arrow/types/json.cc index b29b95715fef6..168e370d51a14 100644 --- a/cpp/src/arrow/types/json.cc +++ b/cpp/src/arrow/types/json.cc @@ -19,10 +19,7 @@ #include -#include "arrow/types/boolean.h" -#include "arrow/types/integer.h" -#include "arrow/types/floating.h" -#include "arrow/types/null.h" +#include "arrow/type.h" #include "arrow/types/string.h" #include "arrow/types/union.h" diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc index b4bbd2841a89d..cec1399514265 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/types/list-test.cc @@ -47,17 +47,18 @@ TEST(TypesTest, TestListType) { ASSERT_EQ(list_type.type, LogicalType::LIST); ASSERT_EQ(list_type.name(), string("list")); - ASSERT_EQ(list_type.ToString(), string("?list")); + ASSERT_EQ(list_type.ToString(), string("list")); ASSERT_EQ(list_type.value_type->type, vt->type); ASSERT_EQ(list_type.value_type->type, vt->type); std::shared_ptr st = std::make_shared(false); std::shared_ptr lt = std::make_shared(st, false); - ASSERT_EQ(lt->ToString(), string("list")); + ASSERT_EQ(lt->ToString(), string("list not null")); ListType lt2(lt, false); - ASSERT_EQ(lt2.ToString(), string("list>")); + ASSERT_EQ(lt2.ToString(), + string("list not null> not null")); } // ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index 577d71d0b2892..5a47ce3187c2b 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -24,10 +24,10 @@ namespace arrow { std::string ListType::ToString() const { std::stringstream s; - if (this->nullable) { - s << "?"; - } s << "list<" << value_type->ToString() << ">"; + if (!this->nullable) { + s << " not null"; + } return s.str(); } diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index f39fe5c4d811b..0539ac87e0c88 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -43,6 +43,7 @@ struct ListType : public DataType { explicit ListType(const TypePtr& value_type, bool nullable = true) : DataType(LogicalType::LIST, nullable), value_type(value_type) {} + virtual ~ListType() {} static char const *name() { return "list"; diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h index 5795cfed577c5..7daf62fadf5cf 100644 --- a/cpp/src/arrow/types/string.h +++ b/cpp/src/arrow/types/string.h @@ -84,11 +84,10 @@ struct StringType : public DataType { } virtual std::string ToString() const { - std::string result; - if (nullable) { - result.append("?"); + std::string result(name()); + if (!nullable) { + result.append(" not null"); } - result.append(name()); return result; } }; diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc index df6157104795e..1a9fc6be4a5ce 100644 --- a/cpp/src/arrow/types/struct-test.cc +++ b/cpp/src/arrow/types/struct-test.cc @@ -49,7 +49,7 @@ TEST(TestStructType, Basics) { ASSERT_TRUE(struct_type.field(1).Equals(f1)); ASSERT_TRUE(struct_type.field(2).Equals(f2)); - ASSERT_EQ(struct_type.ToString(), "?struct"); + ASSERT_EQ(struct_type.ToString(), "struct"); // TODO: out of bounds for field(...) } diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc index 6b233bc372af1..ee2a41c56c938 100644 --- a/cpp/src/arrow/types/struct.cc +++ b/cpp/src/arrow/types/struct.cc @@ -26,7 +26,6 @@ namespace arrow { std::string StructType::ToString() const { std::stringstream s; - if (nullable) s << "?"; s << "struct<"; for (size_t i = 0; i < fields_.size(); ++i) { if (i > 0) s << ", "; @@ -34,6 +33,7 @@ std::string StructType::ToString() const { s << field.name << ": " << field.type->ToString(); } s << ">"; + if (!nullable) s << " not null"; return s.str(); } diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index c53f307c9f59a..4272ce4285482 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -19,22 +19,6 @@ # arrow_util ####################################### -set(UTIL_SRCS - bit-util.cc - buffer.cc - memory-pool.cc - status.cc -) - -set(UTIL_LIBS -) - -add_library(arrow_util STATIC - ${UTIL_SRCS} -) -target_link_libraries(arrow_util ${UTIL_LIBS}) -SET_TARGET_PROPERTIES(arrow_util PROPERTIES LINKER_LANGUAGE CXX) - # Headers: top level install(FILES bit-util.h @@ -50,7 +34,7 @@ install(FILES add_library(arrow_test_util) target_link_libraries(arrow_test_util - arrow_util) +) SET_TARGET_PROPERTIES(arrow_test_util PROPERTIES LINKER_LANGUAGE CXX) @@ -64,7 +48,6 @@ add_library(arrow_test_main if (APPLE) target_link_libraries(arrow_test_main gtest - arrow_util arrow_test_util dl) set_target_properties(arrow_test_main @@ -72,7 +55,6 @@ if (APPLE) else() target_link_libraries(arrow_test_main gtest - arrow_util arrow_test_util pthread dl diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index df55bfac9eb4a..2ea79042bc5e3 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -45,6 +45,12 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1") set(CMAKE_EXPORT_COMPILE_COMMANDS 1) endif() +find_program(CCACHE_FOUND ccache) +if(CCACHE_FOUND) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) +endif(CCACHE_FOUND) + ############################################################ # Compiler flags ############################################################ @@ -389,6 +395,7 @@ add_subdirectory(src/pyarrow) add_subdirectory(src/pyarrow/util) set(PYARROW_SRCS + src/pyarrow/helpers.cc src/pyarrow/init.cc ) @@ -410,11 +417,12 @@ endif() # Setup and build Cython modules ############################################################ -foreach(pyx_api_file - arrow/config.pyx - arrow/parquet.pyx) - set_source_files_properties(${pyx_api_file} PROPERTIES CYTHON_API 1) -endforeach(pyx_api_file) +# foreach(pyx_api_file +# arrow/config.pyx +# arrow/parquet.pyx +# arrow/schema.pyx) +# set_source_files_properties(${pyx_api_file} PROPERTIES CYTHON_API 1) +# endforeach(pyx_api_file) set(USE_RELATIVE_RPATH ON) set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) @@ -422,6 +430,7 @@ set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) set(CYTHON_EXTENSIONS config parquet + schema ) foreach(module ${CYTHON_EXTENSIONS}) diff --git a/python/arrow/array.pxd b/python/arrow/array.pxd new file mode 100644 index 0000000000000..07e7fd335b3d6 --- /dev/null +++ b/python/arrow/array.pxd @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from arrow.includes.common cimport shared_ptr +from arrow.includes.arrow cimport CArray + +cdef class Array: + cdef: + shared_ptr[CArray] sp_array + CArray* array diff --git a/python/arrow/array.pyx b/python/arrow/array.pyx new file mode 100644 index 0000000000000..1f05493f84fb3 --- /dev/null +++ b/python/arrow/array.pyx @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True + +from arrow.compat import frombytes, tobytes +from arrow.includes.arrow cimport * + + +def from_list(list_obj, type=None): + """ + Convert Python list to Arrow array + """ + pass diff --git a/python/arrow/includes/arrow.pxd b/python/arrow/includes/arrow.pxd index 3635ceb868596..8b71f22cc0869 100644 --- a/python/arrow/includes/arrow.pxd +++ b/python/arrow/includes/arrow.pxd @@ -20,4 +20,63 @@ from arrow.includes.common cimport * cdef extern from "arrow/api.h" namespace "arrow" nogil: - pass + + enum LogicalType" arrow::LogicalType::type": + LogicalType_BOOL" arrow::LogicalType::BOOL" + + LogicalType_UINT8" arrow::LogicalType::UINT8" + LogicalType_INT8" arrow::LogicalType::INT8" + LogicalType_UINT16" arrow::LogicalType::UINT16" + LogicalType_INT16" arrow::LogicalType::INT16" + LogicalType_UINT32" arrow::LogicalType::UINT32" + LogicalType_INT32" arrow::LogicalType::INT32" + LogicalType_UINT64" arrow::LogicalType::UINT64" + LogicalType_INT64" arrow::LogicalType::INT64" + + LogicalType_FLOAT" arrow::LogicalType::FLOAT" + LogicalType_DOUBLE" arrow::LogicalType::DOUBLE" + + LogicalType_STRING" arrow::LogicalType::STRING" + + cdef cppclass CDataType" arrow::DataType": + LogicalType type + c_bool nullable + + string ToString() + + cdef cppclass CListType" arrow::ListType"(CDataType): + CListType(const shared_ptr[CDataType]& value_type, + c_bool nullable) + + cdef cppclass CStringType" arrow::StringType"(CDataType): + pass + + cdef cppclass CField" arrow::Field": + string name + shared_ptr[CDataType] type + + CField(const string& name, const shared_ptr[CDataType]& type) + + cdef cppclass CSchema" arrow::Schema": + pass + + cdef cppclass CArray" arrow::Array": + const shared_ptr[CDataType]& type() + + int32_t length() + int32_t null_count() + LogicalType logical_type() + + c_bool IsNull(int i) + + cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray): + pass + + cdef cppclass CInt8Array" arrow::Int8Array"(CArray): + pass + + cdef cppclass CListArray" arrow::ListArray"(CArray): + pass + + cdef cppclass CStringArray" arrow::StringArray"(CListArray): + pass diff --git a/python/arrow/includes/pyarrow.pxd b/python/arrow/includes/pyarrow.pxd index dcef663f3894d..5b6d87a841a17 100644 --- a/python/arrow/includes/pyarrow.pxd +++ b/python/arrow/includes/pyarrow.pxd @@ -18,6 +18,10 @@ # distutils: language = c++ from arrow.includes.common cimport * +from arrow.includes.arrow cimport LogicalType, CDataType -cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil: +cdef extern from "pyarrow/api.h" namespace "arrow::py" nogil: pass + +cdef extern from "pyarrow/helpers.h" namespace "arrow::py" nogil: + shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable); diff --git a/python/arrow/schema.pxd b/python/arrow/schema.pxd new file mode 100644 index 0000000000000..487c246f44abf --- /dev/null +++ b/python/arrow/schema.pxd @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from arrow.includes.common cimport shared_ptr +from arrow.includes.arrow cimport CDataType, CField, CSchema + +cdef class DataType: + cdef: + shared_ptr[CDataType] sp_type + CDataType* type + + cdef init(self, const shared_ptr[CDataType]& type) + +cdef class Field: + cdef: + shared_ptr[CField] sp_field + CField* field + + cdef readonly: + DataType type + +cdef class Schema: + cdef: + shared_ptr[CSchema] sp_schema + CSchema* schema diff --git a/python/arrow/schema.pyx b/python/arrow/schema.pyx new file mode 100644 index 0000000000000..6f6a2dff3547e --- /dev/null +++ b/python/arrow/schema.pyx @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +######################################## +# Data types, fields, schemas, and so forth + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True + +from arrow.compat import frombytes, tobytes +from arrow.includes.arrow cimport * +cimport arrow.includes.pyarrow as pyarrow + +cdef class DataType: + + def __cinit__(self): + pass + + cdef init(self, const shared_ptr[CDataType]& type): + self.sp_type = type + self.type = type.get() + + def __repr__(self): + return 'DataType({0})'.format(self._type_repr()) + + def _type_repr(self): + return frombytes(self.type.ToString()) + +cdef class Field: + + def __cinit__(self, object name, DataType type): + self.type = type + self.sp_field.reset(new CField(tobytes(name), type.sp_type)) + self.field = self.sp_field.get() + + def __repr__(self): + return 'Field({0}, type={1})'.format(self.name, + self.type._type_repr()) + + property name: + + def __get__(self): + return frombytes(self.field.name) + + +cdef DataType primitive_type(LogicalType type, bint nullable=True): + cdef DataType out = DataType() + out.init(pyarrow.GetPrimitiveType(type, nullable)) + return out + +#------------------------------------------------------------ +# Type factory functions + +def uint32(c_bool nullable=True): + return primitive_type(LogicalType_UINT32, nullable) + +def int32(c_bool nullable=True): + return primitive_type(LogicalType_INT32, nullable) + +def list(DataType value_type, c_bool nullable=True): + cdef DataType out = DataType() + + cdef shared_ptr[CDataType] tp + tp.reset( new CListType(value_type.sp_type, nullable)) + out.init(tp) + return out + +def struct(fields, c_bool nullable=True): + pass diff --git a/python/setup.py b/python/setup.py index f6b0a4bee8316..c67351ede2986 100644 --- a/python/setup.py +++ b/python/setup.py @@ -207,7 +207,7 @@ def get_ext_built(self, name): return name + suffix def get_cmake_cython_names(self): - return ['config', 'parquet'] + return ['config', 'parquet', 'schema'] def get_names(self): return self._found_names diff --git a/python/src/pyarrow/helpers.cc b/python/src/pyarrow/helpers.cc new file mode 100644 index 0000000000000..651e77ed22fb3 --- /dev/null +++ b/python/src/pyarrow/helpers.cc @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "pyarrow/helpers.h" + +#include + +namespace arrow { + +namespace py { + +#define GET_PRIMITIVE_TYPE(NAME, Type) \ + case LogicalType::NAME: \ + if (nullable) { \ + return NAME; \ + } else { \ + return std::make_shared(nullable); \ + } \ + break; + +std::shared_ptr GetPrimitiveType(LogicalType::type type, + bool nullable) { + switch (type) { + case LogicalType::NA: + return NA; + GET_PRIMITIVE_TYPE(UINT8, UInt8Type); + GET_PRIMITIVE_TYPE(INT8, Int8Type); + GET_PRIMITIVE_TYPE(UINT32, UInt32Type); + GET_PRIMITIVE_TYPE(INT32, Int32Type); + default: + return nullptr; + } +} + +} // namespace py + +} // namespace arrow diff --git a/cpp/src/arrow/types/null.h b/python/src/pyarrow/helpers.h similarity index 77% rename from cpp/src/arrow/types/null.h rename to python/src/pyarrow/helpers.h index c67f752d40989..d4ab13eb55778 100644 --- a/cpp/src/arrow/types/null.h +++ b/python/src/pyarrow/helpers.h @@ -15,20 +15,21 @@ // specific language governing permissions and limitations // under the License. -#ifndef ARROW_TYPES_NULL_H -#define ARROW_TYPES_NULL_H +#ifndef PYARROW_HELPERS_H +#define PYARROW_HELPERS_H -#include -#include - -#include "arrow/type.h" +#include +#include namespace arrow { -struct NullType : public PrimitiveType { - PRIMITIVE_DECL(NullType, void, NA, 0, "null"); -}; +namespace py { + +std::shared_ptr GetPrimitiveType(LogicalType::type type, + bool nullable); + +} // namespace py } // namespace arrow -#endif // ARROW_TYPES_NULL_H +#endif // PYARROW_HELPERS_H