Skip to content

Commit

Permalink
ARROW-31: Python: prototype user object model, add PyList conversion …
Browse files Browse the repository at this point in the history
…path with type inference

Depends on ARROW-7. Pretty mundane stuff but got to start somewhere. I'm going to do a little more in this patch (handle normal lists of strings and lists of other supported Python types) before merging.

Author: Wes McKinney <[email protected]>

Closes #19 from wesm/ARROW-31 and squashes the following commits:

2345541 [Wes McKinney] Test basic conversion of nested lists
1d4618b [Wes McKinney] Prototype string and double converters
b02b296 [Wes McKinney] Type inference for lists and lists-of-lists
8c3891c [Wes McKinney] Smoke test that array garbage collection deallocates memory
c28bf09 [Wes McKinney] Build array successfully, without validating contents
731544a [Wes McKinney] Move PrimitiveType::ToString template back to type.h
b5b5b82 [Wes McKinney] Failing test stubs, raise on null array
edb451c [Wes McKinney] Add a few data type smoke tests
47fd78e [Wes McKinney] Add unit test stub
07c1379 [Wes McKinney] Move some bits from arrow/type.h to type.cc
3a774fb [Wes McKinney] Add Status::ToString impls. Unit test stub
4e206fc [Wes McKinney] Add pandas converter placeholder
102ed36 [Wes McKinney] Cython array box scaffold builds
94f122f [Wes McKinney] Basic object model for sequence->arrow conversions
bdb02e7 [Wes McKinney] Use shared_ptr with dynamic make_builder too
d5655ba [Wes McKinney] Clean up array builder API to return shared_ptr<Array>
4132bda [Wes McKinney] Essential scaffolding -- error handling, memory pools, etc. -- to work toward converting Python lists to Arrow arrays
55e69a2 [Wes McKinney] Typed array stubs
ac8c796 [Wes McKinney] Cache primitive data type instances
8f7edaf [Wes McKinney] Consolidate Field and data type subclasses. Add more Python stubs
ea2f3ec [Wes McKinney] Bootstrap end-to-end exposure in Python, wrap DataType and Field types
  • Loading branch information
wesm committed Mar 7, 2016
1 parent 571343b commit 9afb667
Show file tree
Hide file tree
Showing 66 changed files with 2,246 additions and 453 deletions.
83 changes: 36 additions & 47 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,17 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
endif()

# Enable using a custom GCC toolchain to build Arrow
if (NOT "$ENV{ARROW_GCC_ROOT}" STREQUAL "")
set(GCC_ROOT $ENV{ARROW_GCC_ROOT})
set(CMAKE_C_COMPILER ${GCC_ROOT}/bin/gcc)
set(CMAKE_CXX_COMPILER ${GCC_ROOT}/bin/g++)
endif()

if(APPLE)
# In newer versions of CMake, this is the default setting
set(CMAKE_MACOSX_RPATH 1)
endif()

find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
endif(CCACHE_FOUND)

# ----------------------------------------------------------------------
# cmake options

Expand Down Expand Up @@ -126,38 +125,16 @@ endif ()
# Add common flags
set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")

# Required to avoid static linking errors with dependencies
add_definitions(-fPIC)

# Determine compiler version
include(CompilerInfo)

if ("${COMPILER_FAMILY}" STREQUAL "clang")
# Clang helpfully provides a few extensions from C++11 such as the 'override'
# keyword on methods. This doesn't change behavior, and we selectively enable
# it in src/gutil/port.h only on clang. So, we can safely use it, and don't want
# to trigger warnings when we do so.
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-extensions")

# Using Clang with ccache causes a bunch of spurious warnings that are
# purportedly fixed in the next version of ccache. See the following for details:
#
# http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html
# http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments")

# Only hardcode -fcolor-diagnostics if stderr is opened on a terminal. Otherwise
# the color codes show up as noisy artifacts.
#
# This test is imperfect because 'cmake' and 'make' can be run independently
# (with different terminal options), and we're testing during the former.
execute_process(COMMAND test -t 2 RESULT_VARIABLE ARROW_IS_TTY)
if ((${ARROW_IS_TTY} EQUAL 0) AND (NOT ("$ENV{TERM}" STREQUAL "dumb")))
message("Running in a controlling terminal")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics")
else()
message("Running without a controlling terminal or in a dumb terminal")
endif()
endif()

# Sanity check linking option.
Expand Down Expand Up @@ -278,12 +255,6 @@ set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
include_directories(src)

############################################################
# Visibility
############################################################
# For generate_export_header() and add_compiler_export_flags().
include(GenerateExportHeader)

############################################################
# Testing
############################################################
Expand Down Expand Up @@ -456,21 +427,32 @@ endif()
# Subdirectories
############################################################

add_subdirectory(src/arrow)
add_subdirectory(src/arrow/util)
add_subdirectory(src/arrow/table)
add_subdirectory(src/arrow/types)

set(LINK_LIBS
arrow_util
arrow_table
arrow_types)
set(LIBARROW_LINK_LIBS
)

set(ARROW_SRCS
src/arrow/array.cc
src/arrow/builder.cc
src/arrow/field.cc
src/arrow/type.cc

src/arrow/table/column.cc
src/arrow/table/schema.cc
src/arrow/table/table.cc

src/arrow/types/construct.cc
src/arrow/types/floating.cc
src/arrow/types/integer.cc
src/arrow/types/json.cc
src/arrow/types/list.cc
src/arrow/types/primitive.cc
src/arrow/types/string.cc
src/arrow/types/struct.cc
src/arrow/types/union.cc

src/arrow/util/bit-util.cc
src/arrow/util/buffer.cc
src/arrow/util/memory-pool.cc
src/arrow/util/status.cc
)

set(LIBARROW_LINKAGE "SHARED")
Expand All @@ -479,8 +461,15 @@ add_library(arrow
${LIBARROW_LINKAGE}
${ARROW_SRCS}
)
target_link_libraries(arrow ${LINK_LIBS})
set_target_properties(arrow PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(arrow
PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
target_link_libraries(arrow ${LIBARROW_LINK_LIBS})

add_subdirectory(src/arrow)
add_subdirectory(src/arrow/util)
add_subdirectory(src/arrow/table)
add_subdirectory(src/arrow/types)

install(TARGETS arrow
LIBRARY DESTINATION lib
Expand Down
1 change: 0 additions & 1 deletion cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ install(FILES
api.h
array.h
builder.h
field.h
type.h
DESTINATION include/arrow)

Expand Down
21 changes: 21 additions & 0 deletions cpp/src/arrow/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,28 @@
// specific language governing permissions and limitations
// under the License.

// Coarse public API while the library is in development

#ifndef ARROW_API_H
#define ARROW_API_H

#include "arrow/array.h"
#include "arrow/builder.h"
#include "arrow/type.h"

#include "arrow/table/column.h"
#include "arrow/table/schema.h"
#include "arrow/table/table.h"

#include "arrow/types/boolean.h"
#include "arrow/types/construct.h"
#include "arrow/types/floating.h"
#include "arrow/types/integer.h"
#include "arrow/types/list.h"
#include "arrow/types/string.h"
#include "arrow/types/struct.h"

#include "arrow/util/memory-pool.h"
#include "arrow/util/status.h"

#endif // ARROW_API_H
10 changes: 7 additions & 3 deletions cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class Array;
class MemoryPool;
class PoolBuffer;

static constexpr int32_t MIN_BUILDER_CAPACITY = 1 << 8;
static constexpr int32_t MIN_BUILDER_CAPACITY = 1 << 5;

// Base class for all data array builders
class ArrayBuilder {
Expand Down Expand Up @@ -78,12 +78,16 @@ class ArrayBuilder {

// Creates new array object to hold the contents of the builder and transfers
// ownership of the data
virtual Status ToArray(Array** out) = 0;
virtual std::shared_ptr<Array> Finish() = 0;

const std::shared_ptr<DataType>& type() const {
return type_;
}

protected:
MemoryPool* pool_;

TypePtr type_;
std::shared_ptr<DataType> type_;

// When nulls are first appended to the builder, the null bitmap is allocated
std::shared_ptr<PoolBuffer> nulls_;
Expand Down
63 changes: 0 additions & 63 deletions cpp/src/arrow/field.h

This file was deleted.

15 changes: 0 additions & 15 deletions cpp/src/arrow/table/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,6 @@
# arrow_table
#######################################

set(TABLE_SRCS
column.cc
schema.cc
table.cc
)

set(TABLE_LIBS
)

add_library(arrow_table STATIC
${TABLE_SRCS}
)
target_link_libraries(arrow_table ${TABLE_LIBS})
SET_TARGET_PROPERTIES(arrow_table PROPERTIES LINKER_LANGUAGE CXX)

# Headers: top level
install(FILES
column.h
Expand Down
1 change: 0 additions & 1 deletion cpp/src/arrow/table/column-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <string>
#include <vector>

#include "arrow/field.h"
#include "arrow/table/column.h"
#include "arrow/table/schema.h"
#include "arrow/table/test-common.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/table/column.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include <memory>
#include <sstream>

#include "arrow/field.h"
#include "arrow/type.h"
#include "arrow/util/status.h"

namespace arrow {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/table/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <vector>

#include "arrow/array.h"
#include "arrow/field.h"
#include "arrow/type.h"

namespace arrow {

Expand Down
9 changes: 4 additions & 5 deletions cpp/src/arrow/table/schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <string>
#include <vector>

#include "arrow/field.h"
#include "arrow/table/schema.h"
#include "arrow/type.h"
#include "arrow/types/string.h"
Expand Down Expand Up @@ -97,10 +96,10 @@ TEST_F(TestSchema, ToString) {
auto schema = std::make_shared<Schema>(fields);

std::string result = schema->ToString();
std::string expected = R"(f0 ?int32
f1 uint8
f2 ?string
f3 ?list<?int16>
std::string expected = R"(f0 int32
f1 uint8 not null
f2 string
f3 list<int16>
)";

ASSERT_EQ(expected, result);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/table/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <sstream>
#include <vector>

#include "arrow/field.h"
#include "arrow/type.h"

namespace arrow {

Expand Down
1 change: 0 additions & 1 deletion cpp/src/arrow/table/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include <string>
#include <vector>

#include "arrow/field.h"
#include "arrow/type.h"

namespace arrow {
Expand Down
1 change: 0 additions & 1 deletion cpp/src/arrow/table/table-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <string>
#include <vector>

#include "arrow/field.h"
#include "arrow/table/column.h"
#include "arrow/table/schema.h"
#include "arrow/table/table.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/table/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include <memory>
#include <sstream>

#include "arrow/field.h"
#include "arrow/table/column.h"
#include "arrow/table/schema.h"
#include "arrow/type.h"
#include "arrow/util/status.h"

namespace arrow {
Expand Down
1 change: 0 additions & 1 deletion cpp/src/arrow/table/test-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <string>
#include <vector>

#include "arrow/field.h"
#include "arrow/table/column.h"
#include "arrow/table/schema.h"
#include "arrow/table/table.h"
Expand Down
Loading

0 comments on commit 9afb667

Please sign in to comment.