Skip to content

Commit

Permalink
ARROW-67: C++ metadata flatbuffer serialization and data movement to …
Browse files Browse the repository at this point in the history
…memory maps

Several things here:

* Add Google flatbuffers dependency
* Flatbuffers IDL draft in collaboration with @jacques-n and @StevenMPhillips
* Add Schema wrapper in Cython
* arrow::Schema conversion to/from flatbuffer representation
* Remove unneeded physical layout types from type.h
* Refactor ListType to be a nested type with a single child
* Implement shared memory round-trip for numeric row batches
* mmap-based shared memory interface and MemorySource abstract API

Quite a bit of judicious code cleaning and consolidation as part of this. For example, List types are now internally equivalent to a nested type with 1 named child field (versus a struct, which can have any number of child fields).

Associated JIRAs: ARROW-48, ARROW-57, ARROW-58

Author: Wes McKinney <[email protected]>

Closes #28 from wesm/cpp-ipc-draft and squashes the following commits:

0cef7ea [Wes McKinney] Add NullArray type now that Array is virtual, fix pyarrow build
5e841f7 [Wes McKinney] Create explicit PrimitiveArray subclasses to avoid unwanted template instantiation
6fa6319 [Wes McKinney] ARROW-28: Draft C++ shared memory IPC workflow and related refactoring / scaffolding / cleaning.
  • Loading branch information
wesm committed Mar 23, 2016
1 parent 093f9bd commit 65db0da
Show file tree
Hide file tree
Showing 88 changed files with 3,113 additions and 838 deletions.
9 changes: 8 additions & 1 deletion ci/travis_before_script_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@ echo $GTEST_HOME

: ${ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install}

cmake -DARROW_BUILD_BENCHMARKS=ON -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
CMAKE_COMMON_FLAGS="-DARROW_BUILD_BENCHMARKS=ON -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL"

if [ $TRAVIS_OS_NAME == "linux" ]; then
cmake -DARROW_TEST_MEMCHECK=on $CMAKE_COMMON_FLAGS -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
else
cmake $CMAKE_COMMON_FLAGS -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
fi

make -j4
make install

Expand Down
6 changes: 1 addition & 5 deletions ci/travis_script_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@ pushd $CPP_BUILD_DIR

make lint

if [ $TRAVIS_OS_NAME == "linux" ]; then
valgrind --tool=memcheck --leak-check=yes --error-exitcode=1 ctest -L unittest
else
ctest -L unittest
fi
ctest -L unittest

popd
96 changes: 65 additions & 31 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
option(ARROW_PARQUET
"Build the Parquet adapter and link to libparquet"
OFF)

option(ARROW_TEST_MEMCHECK
"Run the test suite using valgrind --tool=memcheck"
OFF)
option(ARROW_BUILD_TESTS
"Build the Arrow googletest unit tests"
ON)
Expand All @@ -60,6 +62,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
"Build the Arrow micro benchmarks"
OFF)

option(ARROW_IPC
"Build the Arrow IPC extensions"
ON)

endif()

if(NOT ARROW_BUILD_TESTS)
Expand Down Expand Up @@ -260,17 +266,17 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
include_directories(src)

############################################################
# Benchmarking
# Benchmarking
############################################################
# Add a new micro benchmark, with or without an executable that should be built.
# If benchmarks are enabled then they will be run along side unit tests with ctest.
# 'make runbenchmark' and 'make unittest' to build/run only benchmark or unittests,
# 'make runbenchmark' and 'make unittest' to build/run only benchmark or unittests,
# respectively.
#
# REL_BENCHMARK_NAME is the name of the benchmark app. It may be a single component
# (e.g. monotime-benchmark) or contain additional components (e.g.
# net/net_util-benchmark). Either way, the last component must be a globally
# unique name.
# unique name.

# The benchmark will registered as unit test with ctest with a label
# of 'benchmark'.
Expand All @@ -281,7 +287,7 @@ function(ADD_ARROW_BENCHMARK REL_BENCHMARK_NAME)
return()
endif()
get_filename_component(BENCHMARK_NAME ${REL_BENCHMARK_NAME} NAME_WE)

if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME}.cc)
# This benchmark has a corresponding .cc file, set it up as an executable.
set(BENCHMARK_PATH "${EXECUTABLE_OUTPUT_PATH}/${BENCHMARK_NAME}")
Expand All @@ -294,7 +300,7 @@ function(ADD_ARROW_BENCHMARK REL_BENCHMARK_NAME)
set(BENCHMARK_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME})
set(NO_COLOR "")
endif()

add_test(${BENCHMARK_NAME}
${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} benchmark ${BENCHMARK_PATH} ${NO_COLOR})
set_tests_properties(${BENCHMARK_NAME} PROPERTIES LABELS "benchmark")
Expand Down Expand Up @@ -345,9 +351,18 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME})
endif()

add_test(${TEST_NAME}
${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
if (ARROW_TEST_MEMCHECK)
SET_PROPERTY(TARGET ${TEST_NAME}
APPEND_STRING PROPERTY
COMPILE_FLAGS " -DARROW_VALGRIND")
add_test(${TEST_NAME}
valgrind --tool=memcheck --leak-check=full --error-exitcode=1 ${TEST_PATH})
else()
add_test(${TEST_NAME}
${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
endif()
set_tests_properties(${TEST_NAME} PROPERTIES LABELS "unittest")

if(ARGN)
set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN})
endif()
Expand Down Expand Up @@ -403,7 +418,7 @@ if ("$ENV{GTEST_HOME}" STREQUAL "")
set(GTest_HOME ${THIRDPARTY_DIR}/googletest-release-1.7.0)
endif()

## Google Benchmark
## Google Benchmark
if ("$ENV{GBENCHMARK_HOME}" STREQUAL "")
set(GBENCHMARK_HOME ${THIRDPARTY_DIR}/installed)
endif()
Expand Down Expand Up @@ -487,24 +502,10 @@ if (UNIX)
add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
--verbose=2
--linelength=90
--filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11
`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`)
--filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11,-runtime/references
`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/_generated/g'`)
endif (UNIX)

#----------------------------------------------------------------------
# Parquet adapter

if(ARROW_PARQUET)
find_package(Parquet REQUIRED)
include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
ADD_THIRDPARTY_LIB(parquet
STATIC_LIB ${PARQUET_STATIC_LIB}
SHARED_LIB ${PARQUET_SHARED_LIB})

add_subdirectory(src/arrow/parquet)
list(APPEND LINK_LIBS arrow_parquet parquet)
endif()

############################################################
# Subdirectories
############################################################
Expand All @@ -515,15 +516,18 @@ set(LIBARROW_LINK_LIBS
set(ARROW_SRCS
src/arrow/array.cc
src/arrow/builder.cc
src/arrow/column.cc
src/arrow/schema.cc
src/arrow/table.cc
src/arrow/type.cc

src/arrow/table/column.cc
src/arrow/table/schema.cc
src/arrow/table/table.cc
# IPC / Shared memory library; to be turned into an optional component
src/arrow/ipc/adapter.cc
src/arrow/ipc/memory.cc
src/arrow/ipc/metadata.cc
src/arrow/ipc/metadata-internal.cc

src/arrow/types/construct.cc
src/arrow/types/floating.cc
src/arrow/types/integer.cc
src/arrow/types/json.cc
src/arrow/types/list.cc
src/arrow/types/primitive.cc
Expand Down Expand Up @@ -559,9 +563,39 @@ target_link_libraries(arrow ${LIBARROW_LINK_LIBS})

add_subdirectory(src/arrow)
add_subdirectory(src/arrow/util)
add_subdirectory(src/arrow/table)
add_subdirectory(src/arrow/types)

install(TARGETS arrow
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)

#----------------------------------------------------------------------
# Parquet adapter library

if(ARROW_PARQUET)
find_package(Parquet REQUIRED)
include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
ADD_THIRDPARTY_LIB(parquet
STATIC_LIB ${PARQUET_STATIC_LIB}
SHARED_LIB ${PARQUET_SHARED_LIB})

add_subdirectory(src/arrow/parquet)
list(APPEND LINK_LIBS arrow_parquet parquet)
endif()

#----------------------------------------------------------------------
# IPC library

## Flatbuffers
if(ARROW_IPC)
find_package(Flatbuffers REQUIRED)
message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}")
message(STATUS "Flatbuffers static library: ${FLATBUFFERS_STATIC_LIB}")
message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}")
include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
add_library(flatbuffers STATIC IMPORTED)
set_target_properties(flatbuffers PROPERTIES
IMPORTED_LOCATION ${FLATBUFFERS_STATIC_LIB})

add_subdirectory(src/arrow/ipc)
endif()
95 changes: 95 additions & 0 deletions cpp/cmake_modules/FindFlatbuffers.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Tries to find Flatbuffers headers and libraries.
#
# Usage of this module as follows:
#
# find_package(Flatbuffers)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# Flatbuffers_HOME -
# When set, this path is inspected instead of standard library locations as
# the root of the Flatbuffers installation. The environment variable
# FLATBUFFERS_HOME overrides this veriable.
#
# This module defines
# FLATBUFFERS_INCLUDE_DIR, directory containing headers
# FLATBUFFERS_LIBS, directory containing flatbuffers libraries
# FLATBUFFERS_STATIC_LIB, path to libflatbuffers.a
# FLATBUFFERS_FOUND, whether flatbuffers has been found

if( NOT "$ENV{FLATBUFFERS_HOME}" STREQUAL "")
file( TO_CMAKE_PATH "$ENV{FLATBUFFERS_HOME}" _native_path )
list( APPEND _flatbuffers_roots ${_native_path} )
elseif ( Flatbuffers_HOME )
list( APPEND _flatbuffers_roots ${Flatbuffers_HOME} )
endif()

# Try the parameterized roots, if they exist
if ( _flatbuffers_roots )
find_path( FLATBUFFERS_INCLUDE_DIR NAMES flatbuffers/flatbuffers.h
PATHS ${_flatbuffers_roots} NO_DEFAULT_PATH
PATH_SUFFIXES "include" )
find_library( FLATBUFFERS_LIBRARIES NAMES flatbuffers
PATHS ${_flatbuffers_roots} NO_DEFAULT_PATH
PATH_SUFFIXES "lib" )
else ()
find_path( FLATBUFFERS_INCLUDE_DIR NAMES flatbuffers/flatbuffers.h )
find_library( FLATBUFFERS_LIBRARIES NAMES flatbuffers )
endif ()

find_program(FLATBUFFERS_COMPILER flatc
$ENV{FLATBUFFERS_HOME}/bin
/usr/local/bin
/usr/bin
NO_DEFAULT_PATH
)

if (FLATBUFFERS_INCLUDE_DIR AND FLATBUFFERS_LIBRARIES)
set(FLATBUFFERS_FOUND TRUE)
get_filename_component( FLATBUFFERS_LIBS ${FLATBUFFERS_LIBRARIES} PATH )
set(FLATBUFFERS_LIB_NAME libflatbuffers)
set(FLATBUFFERS_STATIC_LIB ${FLATBUFFERS_LIBS}/${FLATBUFFERS_LIB_NAME}.a)
else ()
set(FLATBUFFERS_FOUND FALSE)
endif ()

if (FLATBUFFERS_FOUND)
if (NOT Flatbuffers_FIND_QUIETLY)
message(STATUS "Found the Flatbuffers library: ${FLATBUFFERS_LIBRARIES}")
endif ()
else ()
if (NOT Flatbuffers_FIND_QUIETLY)
set(FLATBUFFERS_ERR_MSG "Could not find the Flatbuffers library. Looked in ")
if ( _flatbuffers_roots )
set(FLATBUFFERS_ERR_MSG "${FLATBUFFERS_ERR_MSG} in ${_flatbuffers_roots}.")
else ()
set(FLATBUFFERS_ERR_MSG "${FLATBUFFERS_ERR_MSG} system search paths.")
endif ()
if (Flatbuffers_FIND_REQUIRED)
message(FATAL_ERROR "${FLATBUFFERS_ERR_MSG}")
else (Flatbuffers_FIND_REQUIRED)
message(STATUS "${FLATBUFFERS_ERR_MSG}")
endif (Flatbuffers_FIND_REQUIRED)
endif ()
endif ()

mark_as_advanced(
FLATBUFFERS_INCLUDE_DIR
FLATBUFFERS_LIBS
FLATBUFFERS_STATIC_LIB
FLATBUFFERS_COMPILER
)
5 changes: 3 additions & 2 deletions cpp/setup_build_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

SOURCE_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)

./thirdparty/download_thirdparty.sh || { echo "download_thirdparty.sh failed" ; return; }
./thirdparty/build_thirdparty.sh || { echo "build_thirdparty.sh failed" ; return; }
./thirdparty/download_thirdparty.sh || { echo "download_thirdparty.sh failed" ; return; }
./thirdparty/build_thirdparty.sh || { echo "build_thirdparty.sh failed" ; return; }
source thirdparty/versions.sh

export GTEST_HOME=$SOURCE_DIR/thirdparty/$GTEST_BASEDIR
export GBENCHMARK_HOME=$SOURCE_DIR/thirdparty/installed
export FLATBUFFERS_HOME=$SOURCE_DIR/thirdparty/installed

echo "Build env initialized"
8 changes: 8 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
install(FILES
api.h
array.h
column.h
builder.h
schema.h
table.h
type.h
DESTINATION include/arrow)

Expand All @@ -30,3 +33,8 @@ install(FILES
set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})

ADD_ARROW_TEST(array-test)
ADD_ARROW_TEST(column-test)
ADD_ARROW_TEST(schema-test)
ADD_ARROW_TEST(table-test)

ADD_ARROW_BENCHMARK(column-benchmark)
11 changes: 5 additions & 6 deletions cpp/src/arrow/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,19 @@

#include "arrow/array.h"
#include "arrow/builder.h"
#include "arrow/column.h"
#include "arrow/schema.h"
#include "arrow/table.h"
#include "arrow/type.h"

#include "arrow/table/column.h"
#include "arrow/table/schema.h"
#include "arrow/table/table.h"

#include "arrow/types/boolean.h"
#include "arrow/types/construct.h"
#include "arrow/types/floating.h"
#include "arrow/types/integer.h"
#include "arrow/types/list.h"
#include "arrow/types/primitive.h"
#include "arrow/types/string.h"
#include "arrow/types/struct.h"

#include "arrow/util/buffer.h"
#include "arrow/util/memory-pool.h"
#include "arrow/util/status.h"

Expand Down
Loading

0 comments on commit 65db0da

Please sign in to comment.