From da31c2cbba5259c9422f2b9f28e2f788249a07b0 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 22 Jun 2017 20:18:35 -0400 Subject: [PATCH] Port over compression toolchain and interfaces from parquet-cpp, adapt to Arrow-style error handling Change-Id: I7db868884ba173c2c0c3cac0148bd5c218a52db7 --- cpp/CMakeLists.txt | 485 ++------------- cpp/cmake_modules/FindBrotli.cmake | 116 ++++ cpp/cmake_modules/FindSnappy.cmake | 94 +++ cpp/cmake_modules/FindZLIB.cmake | 105 ++++ cpp/cmake_modules/SnappyCMakeLists.txt | 85 +++ cpp/cmake_modules/SnappyConfig.h | 36 ++ cpp/cmake_modules/ThirdpartyToolchain.cmake | 631 ++++++++++++++++++++ cpp/src/arrow/python/CMakeLists.txt | 4 +- cpp/src/arrow/util/CMakeLists.txt | 6 +- cpp/src/arrow/util/compression-test.cc | 89 +++ cpp/src/arrow/util/compression.cc | 327 ++++++++++ cpp/src/arrow/util/compression.h | 109 ++++ cpp/src/arrow/util/logging.h | 7 +- 13 files changed, 1648 insertions(+), 446 deletions(-) create mode 100644 cpp/cmake_modules/FindBrotli.cmake create mode 100644 cpp/cmake_modules/FindSnappy.cmake create mode 100644 cpp/cmake_modules/FindZLIB.cmake create mode 100644 cpp/cmake_modules/SnappyCMakeLists.txt create mode 100644 cpp/cmake_modules/SnappyConfig.h create mode 100644 cpp/cmake_modules/ThirdpartyToolchain.cmake create mode 100644 cpp/src/arrow/util/compression-test.cc create mode 100644 cpp/src/arrow/util/compression.cc create mode 100644 cpp/src/arrow/util/compression.h diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5ba56e5963213..227c2007b9ee1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -136,6 +136,18 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") option(ARROW_PLASMA "Build the plasma object store along with Arrow" OFF) + + option(ARROW_ZLIB_VENDORED + "Build our own zlib (some libz.a aren't configured for static linking)" + ON) + if (MSVC) + set(BROTLI_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING + "Brotli static lib suffix used on Windows with MSVC (default _static)") + set(SNAPPY_MSVC_STATIC_LIB_SUFFIX "" CACHE STRING + "Snappy static lib suffix used on Windows with MSVC (default is empty string)") + set(ZLIB_MSVC_STATIC_LIB_SUFFIX "libstatic" CACHE STRING + "Zlib static lib suffix used on Windows with MSVC (default libstatic)") + endif() endif() if(ARROW_BUILD_TESTS) @@ -168,7 +180,6 @@ endif() # Add common flags set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COMMON_FLAGS}") -set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARROW_CXXFLAGS}") message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") @@ -404,439 +415,7 @@ enable_testing() # Dependencies ############################################################ -# ---------------------------------------------------------------------- -# Thirdparty toolchain - -set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty") -set(GFLAGS_VERSION "2.1.2") -set(GTEST_VERSION "1.8.0") -set(GBENCHMARK_VERSION "1.1.0") -set(FLATBUFFERS_VERSION "1.6.0") -set(JEMALLOC_VERSION "4.4.0") - -if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "") - set(FLATBUFFERS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") - set(RAPIDJSON_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") - set(JEMALLOC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") - set(GFLAGS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") - - if (NOT DEFINED ENV{BOOST_ROOT}) - # Since we have to set this in the environment, we check whether - # $BOOST_ROOT is defined inside here - set(ENV{BOOST_ROOT} "$ENV{ARROW_BUILD_TOOLCHAIN}") - endif() -endif() - -if (DEFINED ENV{FLATBUFFERS_HOME}) - set(FLATBUFFERS_HOME "$ENV{FLATBUFFERS_HOME}") -endif() - -if (DEFINED ENV{RAPIDJSON_HOME}) - set(RAPIDJSON_HOME "$ENV{RAPIDJSON_HOME}") -endif() - -if (DEFINED ENV{JEMALLOC_HOME}) - set(JEMALLOC_HOME "$ENV{JEMALLOC_HOME}") -endif() - -if (DEFINED ENV{GFLAGS_HOME}) - set(GFLAGS_HOME "$ENV{GFLAGS_HOME}") -endif() - -# ---------------------------------------------------------------------- -# Find pthreads - -if (NOT MSVC) - find_library(PTHREAD_LIBRARY pthread) - message(STATUS "Found pthread: ${PTHREAD_LIBRARY}") -endif() - -# ---------------------------------------------------------------------- -# Add Boost dependencies (code adapted from Apache Kudu (incubating)) - -set(Boost_DEBUG TRUE) -set(Boost_USE_MULTITHREADED ON) -set(Boost_ADDITIONAL_VERSIONS - "1.63.0" "1.63" - "1.62.0" "1.61" - "1.61.0" "1.62" - "1.60.0" "1.60") - -if (ARROW_BOOST_USE_SHARED) - # Find shared Boost libraries. - set(Boost_USE_STATIC_LIBS OFF) - - if(MSVC) - # disable autolinking in boost - add_definitions(-DBOOST_ALL_NO_LIB) - - # force all boost libraries to dynamic link - add_definitions(-DBOOST_ALL_DYN_LINK) - endif() - - if (ARROW_BOOST_HEADER_ONLY) - find_package(Boost) - else() - find_package(Boost COMPONENTS system filesystem REQUIRED) - if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") - set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG}) - set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG}) - else() - set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE}) - set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE}) - endif() - set(BOOST_SYSTEM_LIBRARY boost_system_shared) - set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared) - endif() -else() - # Find static boost headers and libs - # TODO Differentiate here between release and debug builds - set(Boost_USE_STATIC_LIBS ON) - if (ARROW_BOOST_HEADER_ONLY) - find_package(Boost) - else() - find_package(Boost COMPONENTS system filesystem regex REQUIRED) - if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") - set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG}) - set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG}) - else() - set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE}) - set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE}) - endif() - set(BOOST_SYSTEM_LIBRARY boost_system_static) - set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static) - endif() -endif() - -message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS}) -message(STATUS "Boost libraries: " ${Boost_LIBRARIES}) - -if (NOT ARROW_BOOST_HEADER_ONLY) - ADD_THIRDPARTY_LIB(boost_system - STATIC_LIB "${BOOST_STATIC_SYSTEM_LIBRARY}" - SHARED_LIB "${BOOST_SHARED_SYSTEM_LIBRARY}") - - ADD_THIRDPARTY_LIB(boost_filesystem - STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}" - SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}") - - SET(ARROW_BOOST_LIBS boost_system boost_filesystem) -endif() - -include_directories(SYSTEM ${Boost_INCLUDE_DIR}) - -if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS) - add_custom_target(unittest ctest -L unittest) - - if("$ENV{GTEST_HOME}" STREQUAL "") - if(APPLE) - set(GTEST_CMAKE_CXX_FLAGS "-fPIC -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes") - elseif(NOT MSVC) - set(GTEST_CMAKE_CXX_FLAGS "-fPIC") - endif() - string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE) - set(GTEST_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}} ${GTEST_CMAKE_CXX_FLAGS}") - - set(GTEST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-prefix/src/googletest_ep") - set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include") - set(GTEST_STATIC_LIB - "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(GTEST_MAIN_STATIC_LIB - "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(GTEST_VENDORED 1) - set(GTEST_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_INSTALL_PREFIX=${GTEST_PREFIX} - -Dgtest_force_shared_crt=ON - -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS}) - - if (CMAKE_VERSION VERSION_GREATER "3.2") - # BUILD_BYPRODUCTS is a 3.2+ feature - ExternalProject_Add(googletest_ep - URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz" - BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB} - CMAKE_ARGS ${GTEST_CMAKE_ARGS}) - else() - ExternalProject_Add(googletest_ep - URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz" - CMAKE_ARGS ${GTEST_CMAKE_ARGS}) - endif() - else() - find_package(GTest REQUIRED) - set(GTEST_VENDORED 0) - endif() - - message(STATUS "GTest include dir: ${GTEST_INCLUDE_DIR}") - message(STATUS "GTest static library: ${GTEST_STATIC_LIB}") - include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) - ADD_THIRDPARTY_LIB(gtest - STATIC_LIB ${GTEST_STATIC_LIB}) - ADD_THIRDPARTY_LIB(gtest_main - STATIC_LIB ${GTEST_MAIN_STATIC_LIB}) - - if(GTEST_VENDORED) - add_dependencies(gtest googletest_ep) - add_dependencies(gtest_main googletest_ep) - endif() - - # gflags (formerly Googleflags) command line parsing - if("${GFLAGS_HOME}" STREQUAL "") - set(GFLAGS_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS}) - - set(GFLAGS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep") - set(GFLAGS_HOME "${GFLAGS_PREFIX}") - set(GFLAGS_INCLUDE_DIR "${GFLAGS_PREFIX}/include") - if(MSVC) - set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/gflags_static.lib") - else() - set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/libgflags.a") - endif() - set(GFLAGS_VENDORED 1) - set(GFLAGS_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_INSTALL_PREFIX=${GFLAGS_PREFIX} - -DBUILD_SHARED_LIBS=OFF - -DBUILD_STATIC_LIBS=ON - -DBUILD_PACKAGING=OFF - -DBUILD_TESTING=OFF - -BUILD_CONFIG_TESTS=OFF - -DINSTALL_HEADERS=ON - -DCMAKE_CXX_FLAGS=${GFLAGS_CMAKE_CXX_FLAGS}) - if (CMAKE_VERSION VERSION_GREATER "3.2") - # BUILD_BYPRODUCTS is a 3.2+ feature - ExternalProject_Add(gflags_ep - GIT_REPOSITORY https://github.com/gflags/gflags.git - GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee - BUILD_IN_SOURCE 1 - BUILD_BYPRODUCTS "${GFLAGS_STATIC_LIB}" - CMAKE_ARGS ${GFLAGS_CMAKE_ARGS}) - else() - ExternalProject_Add(gflags_ep - GIT_REPOSITORY https://github.com/gflags/gflags.git - GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee - BUILD_IN_SOURCE 1 - CMAKE_ARGS ${GFLAGS_CMAKE_ARGS}) - endif() - else() - set(GFLAGS_VENDORED 0) - find_package(GFlags REQUIRED) - endif() - - message(STATUS "GFlags include dir: ${GFLAGS_INCLUDE_DIR}") - message(STATUS "GFlags static library: ${GFLAGS_STATIC_LIB}") - include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR}) - ADD_THIRDPARTY_LIB(gflags - STATIC_LIB ${GFLAGS_STATIC_LIB}) - if(MSVC) - set_target_properties(gflags - PROPERTIES - IMPORTED_LINK_INTERFACE_LIBRARIES "shlwapi.lib") - endif() - - if(GFLAGS_VENDORED) - add_dependencies(gflags gflags_ep) - endif() -endif() - -if(ARROW_BUILD_BENCHMARKS) - add_custom_target(runbenchmark ctest -L benchmark) - - if("$ENV{GBENCHMARK_HOME}" STREQUAL "") - if(APPLE) - set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC -std=c++11 -stdlib=libc++") - elseif(NOT MSVC) - set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC --std=c++11") - endif() - - set(GBENCHMARK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install") - set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include") - set(GBENCHMARK_STATIC_LIB "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(GBENCHMARK_VENDORED 1) - set(GBENCHMARK_CMAKE_ARGS - "-DCMAKE_BUILD_TYPE=Release" - "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}" - "-DBENCHMARK_ENABLE_TESTING=OFF" - "-DCMAKE_CXX_FLAGS=${GBENCHMARK_CMAKE_CXX_FLAGS}") - if (APPLE) - set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON") - endif() - if (CMAKE_VERSION VERSION_GREATER "3.2") - # BUILD_BYPRODUCTS is a 3.2+ feature - ExternalProject_Add(gbenchmark_ep - URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz" - BUILD_BYPRODUCTS "${GBENCHMARK_STATIC_LIB}" - CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS}) - else() - ExternalProject_Add(gbenchmark_ep - URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz" - CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS}) - endif() - else() - find_package(GBenchmark REQUIRED) - set(GBENCHMARK_VENDORED 0) - endif() - - message(STATUS "GBenchmark include dir: ${GBENCHMARK_INCLUDE_DIR}") - message(STATUS "GBenchmark static library: ${GBENCHMARK_STATIC_LIB}") - include_directories(SYSTEM ${GBENCHMARK_INCLUDE_DIR}) - ADD_THIRDPARTY_LIB(benchmark - STATIC_LIB ${GBENCHMARK_STATIC_LIB}) - - if(GBENCHMARK_VENDORED) - add_dependencies(benchmark gbenchmark_ep) - endif() -endif() - -if (ARROW_IPC) - # RapidJSON, header only dependency - if("${RAPIDJSON_HOME}" STREQUAL "") - ExternalProject_Add(rapidjson_ep - PREFIX "${CMAKE_BINARY_DIR}" - URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz" - URL_MD5 "badd12c511e081fec6c89c43a7027bce" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - BUILD_IN_SOURCE 1 - INSTALL_COMMAND "") - - ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR) - set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include") - set(RAPIDJSON_VENDORED 1) - else() - set(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_HOME}/include") - set(RAPIDJSON_VENDORED 0) - endif() - message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}") - include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR}) - - ## Flatbuffers - if("${FLATBUFFERS_HOME}" STREQUAL "") - set(FLATBUFFERS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install") - ExternalProject_Add(flatbuffers_ep - URL "https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz" - CMAKE_ARGS - "-DCMAKE_CXX_FLAGS=-fPIC" - "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}" - "-DFLATBUFFERS_BUILD_TESTS=OFF") - - set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include") - set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc") - set(FLATBUFFERS_VENDORED 1) - else() - find_package(Flatbuffers REQUIRED) - set(FLATBUFFERS_VENDORED 0) - endif() - - message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}") - message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}") - include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR}) -endif() -#---------------------------------------------------------------------- - -if (MSVC) - # jemalloc is not supported on Windows - set(ARROW_JEMALLOC off) -endif() - -if (ARROW_JEMALLOC) - find_package(jemalloc) - - if(NOT JEMALLOC_FOUND) - set(ARROW_JEMALLOC_USE_SHARED OFF) - set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/") - set(JEMALLOC_HOME "${JEMALLOC_PREFIX}") - set(JEMALLOC_INCLUDE_DIR "${JEMALLOC_PREFIX}/include") - set(JEMALLOC_SHARED_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}") - set(JEMALLOC_STATIC_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(JEMALLOC_VENDORED 1) - if (CMAKE_VERSION VERSION_GREATER "3.2") - # BUILD_BYPRODUCTS is a 3.2+ feature - ExternalProject_Add(jemalloc_ep - URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2 - CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix=" - BUILD_IN_SOURCE 1 - BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} - BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}" "${JEMALLOC_SHARED_LIB}" - INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install) - else() - ExternalProject_Add(jemalloc_ep - URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2 - CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix=" - BUILD_IN_SOURCE 1 - BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} - INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install) - endif() - else() - set(JEMALLOC_VENDORED 0) - endif() - - include_directories(SYSTEM ${JEMALLOC_INCLUDE_DIR}) - ADD_THIRDPARTY_LIB(jemalloc - STATIC_LIB ${JEMALLOC_STATIC_LIB} - SHARED_LIB ${JEMALLOC_SHARED_LIB} - DEPS ${PTHREAD_LIBRARY}) -endif() - -## Google PerfTools -## -## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment -## near definition of ARROW_USING_GOLD). -# find_package(GPerf REQUIRED) -# if (NOT "${ARROW_USE_ASAN}" AND -# NOT "${ARROW_USE_TSAN}" AND -# NOT ("${ARROW_USING_GOLD}" AND "${ARROW_LINK}" STREQUAL "d")) -# ADD_THIRDPARTY_LIB(tcmalloc -# STATIC_LIB "${TCMALLOC_STATIC_LIB}" -# SHARED_LIB "${TCMALLOC_SHARED_LIB}") -# ADD_THIRDPARTY_LIB(profiler -# STATIC_LIB "${PROFILER_STATIC_LIB}" -# SHARED_LIB "${PROFILER_SHARED_LIB}") -# list(APPEND ARROW_BASE_LIBS tcmalloc profiler) -# add_definitions("-DTCMALLOC_ENABLED") -# set(ARROW_TCMALLOC_AVAILABLE 1) -# endif() - -######################################################################## -# HDFS thirdparty setup - -if (DEFINED ENV{HADOOP_HOME}) - set(HADOOP_HOME $ENV{HADOOP_HOME}) - if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h") - message(STATUS "Did not find hdfs.h in expected location, using vendored one") - set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop") - endif() -else() - set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop") -endif() - -set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h") -if (NOT EXISTS ${HDFS_H_PATH}) - message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}") -endif() -message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH}) - -include_directories(SYSTEM "${HADOOP_HOME}/include") - -############################################################ -# Linker setup -############################################################ -set(ARROW_MIN_TEST_LIBS - ${ARROW_STATIC_LINK_LIBS} - arrow_static - gtest - gtest_main - ${ARROW_BASE_LIBS}) - -if(NOT MSVC) - set(ARROW_MIN_TEST_LIBS - ${ARROW_MIN_TEST_LIBS} - ${CMAKE_DL_LIBS}) -endif() - -set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS}) - -set(ARROW_BENCHMARK_LINK_LIBS - arrow_static - arrow_benchmark_main - ${ARROW_BASE_LIBS}) +include(ThirdpartyToolchain) ############################################################ # "make ctags" target @@ -936,17 +515,40 @@ if (${CLANG_TIDY_FOUND}) endif() - - ############################################################ -# Subdirectories +# Linker and Dependencies ############################################################ +set(ARROW_STATIC_LINK_LIBS + brotli_dec + brotli_enc + brotli_common + snappy + zlib) + +set(ARROW_MIN_TEST_LIBS + ${ARROW_STATIC_LINK_LIBS} + arrow_static + gtest + gtest_main + ${ARROW_BASE_LIBS}) + +if(NOT MSVC) + set(ARROW_MIN_TEST_LIBS + ${ARROW_MIN_TEST_LIBS} + ${CMAKE_DL_LIBS}) +endif() + +set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS}) + +set(ARROW_BENCHMARK_LINK_LIBS + arrow_static + arrow_benchmark_main + ${ARROW_BASE_LIBS}) + set(ARROW_LINK_LIBS ) -set(ARROW_STATIC_LINK_LIBS) - set(ARROW_SHARED_PRIVATE_LINK_LIBS ${BOOST_SYSTEM_LIBRARY} ${BOOST_FILESYSTEM_LIBRARY}) @@ -1017,6 +619,10 @@ if(FLATBUFFERS_VENDORED) set(ARROW_DEPENDENCIES ${ARROW_DEPENDENCIES} flatbuffers_ep) endif() +############################################################ +# Subdirectories +############################################################ + if(NOT WIN32 AND ARROW_PLASMA) add_subdirectory(src/plasma) endif() @@ -1048,6 +654,7 @@ set(ARROW_SRCS src/arrow/io/memory.cc src/arrow/util/bit-util.cc + src/arrow/util/compression.cc src/arrow/util/decimal.cc src/arrow/util/key_value_metadata.cc ) diff --git a/cpp/cmake_modules/FindBrotli.cmake b/cpp/cmake_modules/FindBrotli.cmake new file mode 100644 index 0000000000000..f2e714c6fc870 --- /dev/null +++ b/cpp/cmake_modules/FindBrotli.cmake @@ -0,0 +1,116 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Tries to find Brotli headers and libraries. +# +# Usage of this module as follows: +# +# find_package(Brotli) +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# Brotli_HOME - When set, this path is inspected instead of standard library +# locations as the root of the Brotli installation. +# The environment variable BROTLI_HOME overrides this veriable. +# +# This module defines +# BROTLI_INCLUDE_DIR, directory containing headers +# BROTLI_LIBS, directory containing brotli libraries +# BROTLI_STATIC_LIB, path to libbrotli.a +# BROTLI_SHARED_LIB, path to libbrotli's shared library +# BROTLI_FOUND, whether brotli has been found + +if( NOT "${BROTLI_HOME}" STREQUAL "") + file( TO_CMAKE_PATH "${BROTLI_HOME}" _native_path ) + list( APPEND _brotli_roots ${_native_path} ) +elseif ( Brotli_HOME ) + list( APPEND _brotli_roots ${Brotli_HOME} ) +endif() + +find_path( BROTLI_INCLUDE_DIR NAMES brotli/decode.h + PATHS ${_brotli_roots} + NO_DEFAULT_PATH + PATH_SUFFIXES "include" ) + +find_library( BROTLI_LIBRARY_ENC NAMES libbrotlienc.a brotlienc + PATHS ${_brotli_roots} + NO_DEFAULT_PATH + PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" ) + +find_library( BROTLI_LIBRARY_DEC NAMES libbrotlidec.a brotlidec + PATHS ${_brotli_roots} + NO_DEFAULT_PATH + PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" ) + +find_library( BROTLI_LIBRARY_COMMON NAMES libbrotlicommon.a brotlicommon + PATHS ${_brotli_roots} + NO_DEFAULT_PATH + PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" ) + +set(BROTLI_LIBRARIES ${BROTLI_LIBRARY_ENC} ${BROTLI_LIBRARY_DEC} + ${BROTLI_LIBRARY_COMMON}) + +if (BROTLI_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR BROTLI_LIBRARIES)) + set(BROTLI_FOUND TRUE) + get_filename_component( BROTLI_LIBS ${BROTLI_LIBRARY_ENC} PATH ) + set(BROTLI_LIB_NAME brotli) + if (MSVC AND NOT BROTLI_MSVC_STATIC_LIB_SUFFIX) + set(BROTLI_MSVC_STATIC_LIB_SUFFIX _static) + endif() + set(BROTLI_STATIC_LIB + ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX} + ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX} + ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(BROTLI_STATIC_LIBRARY_ENC ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(BROTLI_STATIC_LIBRARY_DEC ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(BROTLI_STATIC_LIBRARY_COMMON ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(BROTLI_SHARED_LIB + ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${CMAKE_SHARED_LIBRARY_SUFFIX} + ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${CMAKE_SHARED_LIBRARY_SUFFIX} + ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${CMAKE_SHARED_LIBRARY_SUFFIX}) +else () + set(BROTLI_FOUND FALSE) +endif () + +if (BROTLI_FOUND) + if (NOT Brotli_FIND_QUIETLY) + if (PARQUET_MINIMAL_DEPENDENCY) + message(STATUS "Found the Brotli headers: ${BROTLI_INCLUDE_DIR}") + else () + message(STATUS "Found the Brotli library: ${BROTLI_LIBRARIES}") + endif () + endif () +else () + if (NOT Brotli_FIND_QUIETLY) + set(BROTLI_ERR_MSG "Could not find the Brotli library. Looked in ") + if ( _brotli_roots ) + set(BROTLI_ERR_MSG "${BROTLI_ERR_MSG} in ${_brotli_roots}.") + else () + set(BROTLI_ERR_MSG "${BROTLI_ERR_MSG} system search paths.") + endif () + if (Brotli_FIND_REQUIRED) + message(FATAL_ERROR "${BROTLI_ERR_MSG}") + else (Brotli_FIND_REQUIRED) + message(STATUS "${BROTLI_ERR_MSG}") + endif (Brotli_FIND_REQUIRED) + endif () +endif () + +mark_as_advanced( + BROTLI_INCLUDE_DIR + BROTLI_LIBS + BROTLI_LIBRARIES + BROTLI_STATIC_LIB + BROTLI_SHARED_LIB +) diff --git a/cpp/cmake_modules/FindSnappy.cmake b/cpp/cmake_modules/FindSnappy.cmake new file mode 100644 index 0000000000000..867963c103c4b --- /dev/null +++ b/cpp/cmake_modules/FindSnappy.cmake @@ -0,0 +1,94 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Tries to find Snappy headers and libraries. +# +# Usage of this module as follows: +# +# find_package(Snappy) +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# Snappy_HOME - When set, this path is inspected instead of standard library +# locations as the root of the Snappy installation. +# The environment variable SNAPPY_HOME overrides this variable. +# +# This module defines +# SNAPPY_INCLUDE_DIR, directory containing headers +# SNAPPY_LIBS, directory containing snappy libraries +# SNAPPY_STATIC_LIB, path to libsnappy.a +# SNAPPY_SHARED_LIB, path to libsnappy's shared library +# SNAPPY_FOUND, whether snappy has been found + +if( NOT "${SNAPPY_HOME}" STREQUAL "") + file( TO_CMAKE_PATH "${SNAPPY_HOME}" _native_path ) + list( APPEND _snappy_roots ${_native_path} ) +elseif ( Snappy_HOME ) + list( APPEND _snappy_roots ${Snappy_HOME} ) +endif() + +message(STATUS "SNAPPY_HOME: ${SNAPPY_HOME}") +find_path(SNAPPY_INCLUDE_DIR snappy.h HINTS + ${_snappy_roots} + NO_DEFAULT_PATH + PATH_SUFFIXES "include") + +find_library( SNAPPY_LIBRARIES NAMES snappy PATHS + ${_snappy_roots} + NO_DEFAULT_PATH + PATH_SUFFIXES "lib") + +if (SNAPPY_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR SNAPPY_LIBRARIES)) + set(SNAPPY_FOUND TRUE) + get_filename_component( SNAPPY_LIBS ${SNAPPY_LIBRARIES} PATH ) + set(SNAPPY_HEADER_NAME snappy.h) + set(SNAPPY_HEADER ${SNAPPY_INCLUDE_DIR}/${SNAPPY_HEADER_NAME}) + set(SNAPPY_LIB_NAME snappy) + set(SNAPPY_STATIC_LIB ${SNAPPY_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(SNAPPY_SHARED_LIB ${SNAPPY_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${SNAPPY_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) +else () + set(SNAPPY_FOUND FALSE) +endif () + +if (SNAPPY_FOUND) + if (NOT Snappy_FIND_QUIETLY) + if (PARQUET_MINIMAL_DEPENDENCY) + message(STATUS "Found the Snappy header: ${SNAPPY_HEADER}") + else () + message(STATUS "Found the Snappy library: ${SNAPPY_LIBRARIES}") + endif () + endif () +else () + if (NOT Snappy_FIND_QUIETLY) + set(SNAPPY_ERR_MSG "Could not find the Snappy library. Looked in ") + if ( _snappy_roots ) + set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${_snappy_roots}.") + else () + set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} system search paths.") + endif () + if (Snappy_FIND_REQUIRED) + message(FATAL_ERROR "${SNAPPY_ERR_MSG}") + else (Snappy_FIND_REQUIRED) + message(STATUS "${SNAPPY_ERR_MSG}") + endif (Snappy_FIND_REQUIRED) + endif () +endif () + +mark_as_advanced( + SNAPPY_INCLUDE_DIR + SNAPPY_LIBS + SNAPPY_LIBRARIES + SNAPPY_STATIC_LIB + SNAPPY_SHARED_LIB +) diff --git a/cpp/cmake_modules/FindZLIB.cmake b/cpp/cmake_modules/FindZLIB.cmake new file mode 100644 index 0000000000000..78b84f2073ff6 --- /dev/null +++ b/cpp/cmake_modules/FindZLIB.cmake @@ -0,0 +1,105 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Tries to find ZLIB headers and libraries. +# +# Usage of this module as follows: +# +# find_package(ZLIB) +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# ZLIB_HOME - When set, this path is inspected instead of standard library +# locations as the root of the ZLIB installation. +# The environment variable ZLIB_HOME overrides this variable. +# +# - Find ZLIB (zlib.h, libz.a, libz.so, and libz.so.1) +# This module defines +# ZLIB_INCLUDE_DIR, directory containing headers +# ZLIB_LIBS, directory containing zlib libraries +# ZLIB_STATIC_LIB, path to libz.a +# ZLIB_SHARED_LIB, path to libz's shared library +# ZLIB_FOUND, whether zlib has been found + +if( NOT "${ZLIB_HOME}" STREQUAL "") + file( TO_CMAKE_PATH "${ZLIB_HOME}" _native_path ) + list( APPEND _zlib_roots ${_native_path} ) +elseif ( ZLIB_HOME ) + list( APPEND _zlib_roots ${ZLIB_HOME} ) +endif() + +# Try the parameterized roots, if they exist +if ( _zlib_roots ) + find_path( ZLIB_INCLUDE_DIR NAMES zlib.h + PATHS ${_zlib_roots} NO_DEFAULT_PATH + PATH_SUFFIXES "include" ) + find_library( ZLIB_LIBRARIES NAMES libz.a zlib + PATHS ${_zlib_roots} NO_DEFAULT_PATH + PATH_SUFFIXES "lib" ) +else () + find_path( ZLIB_INCLUDE_DIR NAMES zlib.h ) + # Only look for the static library + find_library( ZLIB_LIBRARIES NAMES libz.a zlib ) +endif () + + +if (ZLIB_INCLUDE_DIR AND (PARQUET_MINIMAL_DEPENDENCY OR ZLIB_LIBRARIES)) + set(ZLIB_FOUND TRUE) + get_filename_component( ZLIB_LIBS ${ZLIB_LIBRARIES} PATH ) + set(ZLIB_HEADER_NAME zlib.h) + set(ZLIB_HEADER ${ZLIB_INCLUDE_DIR}/${ZLIB_HEADER_NAME}) + set(ZLIB_LIB_NAME z) + if (MSVC) + if (NOT ZLIB_MSVC_STATIC_LIB_SUFFIX) + set(ZLIB_MSVC_STATIC_LIB_SUFFIX libstatic) + endif() + set(ZLIB_MSVC_SHARED_LIB_SUFFIX lib) + endif() + set(ZLIB_STATIC_LIB ${ZLIB_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${ZLIB_LIB_NAME}${ZLIB_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(ZLIB_SHARED_LIB ${ZLIB_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${ZLIB_LIB_NAME}${ZLIB_MSVC_SHARED_LIB_SUFFIX}${CMAKE_SHARED_LIBRARY_SUFFIX}) +else () + set(ZLIB_FOUND FALSE) +endif () + +if (ZLIB_FOUND) + if (NOT ZLIB_FIND_QUIETLY) + if (PARQUET_MINIMAL_DEPENDENCY) + message(STATUS "Found the ZLIB header: ${ZLIB_HEADER}") + else() + message(STATUS "Found the ZLIB library: ${ZLIB_LIBRARIES}") + endif () + endif () +else () + if (NOT ZLIB_FIND_QUIETLY) + set(ZLIB_ERR_MSG "Could not find the ZLIB library. Looked in ") + if ( _zlib_roots ) + set(ZLIB_ERR_MSG "${ZLIB_ERR_MSG} in ${_zlib_roots}.") + else () + set(ZLIB_ERR_MSG "${ZLIB_ERR_MSG} system search paths.") + endif () + if (ZLIB_FIND_REQUIRED) + message(FATAL_ERROR "${ZLIB_ERR_MSG}") + else (ZLIB_FIND_REQUIRED) + message(STATUS "${ZLIB_ERR_MSG}") + endif (ZLIB_FIND_REQUIRED) + endif () +endif () + +mark_as_advanced( + ZLIB_INCLUDE_DIR + ZLIB_LIBS + ZLIB_LIBRARIES + ZLIB_STATIC_LIB + ZLIB_SHARED_LIB +) diff --git a/cpp/cmake_modules/SnappyCMakeLists.txt b/cpp/cmake_modules/SnappyCMakeLists.txt new file mode 100644 index 0000000000000..9d0a166064ea6 --- /dev/null +++ b/cpp/cmake_modules/SnappyCMakeLists.txt @@ -0,0 +1,85 @@ +# Copyright 2008 Google Inc. All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +CMAKE_MINIMUM_REQUIRED(VERSION 2.6) +PROJECT(snappy) + +INCLUDE(CheckIncludeFiles) +INCLUDE(CMakePackageConfigHelpers) + +CHECK_INCLUDE_FILES("stdint.h" HAVE_STDINT_H) +CHECK_INCLUDE_FILES("stddef.h" HAVE_STDDEF_H) +CHECK_INCLUDE_FILES("sys/uio.h" HAVE_SYS_UIO_H) + +if (NOT HAVE_SYS_UIO_H) + set(HAVE_SYS_UIO_H 0) +endif() + +if (NOT HAVE_STDINT_H) + set(HAVE_STDINT_H 0) +endif() + +if (NOT HAVE_STDDEF_H) + set(HAVE_STDDEF_H 0) +endif() + +set(ac_cv_have_stdint_h ${HAVE_STDINT_H}) +set(ac_cv_have_stddef_h ${HAVE_STDDEF_H}) +set(ac_cv_have_sys_uio_h ${HAVE_SYS_UIO_H}) +CONFIGURE_FILE(${snappy_SOURCE_DIR}/snappy-stubs-public.h.in + snappy-stubs-public.h) + +if (WIN32) + ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS) +endif() + +set(SNAPPY_SRCS snappy.cc + snappy-c.cc + snappy-stubs-internal.cc + snappy-sinksource.cc + snappy.h + snappy-c.h + snappy-sinksource.h + snappy-stubs-public.h) + +add_library(snappy SHARED ${SNAPPY_SRCS}) +add_library(snappystatic STATIC ${SNAPPY_SRCS}) + +TARGET_COMPILE_DEFINITIONS(snappy PRIVATE -DHAVE_CONFIG_H) +TARGET_COMPILE_DEFINITIONS(snappystatic PRIVATE -DHAVE_CONFIG_H) + +install(FILES snappy.h + snappy-c.h + snappy-sinksource.h + ${snappy_BINARY_DIR}/snappy-stubs-public.h + DESTINATION include) + +install(TARGETS snappy snappystatic + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) diff --git a/cpp/cmake_modules/SnappyConfig.h b/cpp/cmake_modules/SnappyConfig.h new file mode 100644 index 0000000000000..74eb77621626b --- /dev/null +++ b/cpp/cmake_modules/SnappyConfig.h @@ -0,0 +1,36 @@ +// Copyright 2008 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef SNAPPY_CONFIG_H +#define SNAPPY_CONFIG_H 1 + +#if defined(_MSC_VER) && (_MSC_VER <= 1900) +typedef __int64 ssize_t; +#endif + +#endif diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake new file mode 100644 index 0000000000000..de74efac7ff4e --- /dev/null +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -0,0 +1,631 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +# ---------------------------------------------------------------------- +# Thirdparty toolchain + +set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty") +set(GFLAGS_VERSION "2.1.2") +set(GTEST_VERSION "1.8.0") +set(GBENCHMARK_VERSION "1.1.0") +set(FLATBUFFERS_VERSION "1.6.0") +set(JEMALLOC_VERSION "4.4.0") +set(SNAPPY_VERSION "1.1.3") +set(BROTLI_VERSION "v0.6.0") + +string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE) + +set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}") +set(EP_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}}") + +if (NOT MSVC) + # Set -fPIC on all external projects + set(EP_CXX_FLAGS "${EP_CXX_FLAGS} -fPIC") + set(EP_C_FLAGS "${EP_C_FLAGS} -fPIC") +endif() + +if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "") + set(FLATBUFFERS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") + set(RAPIDJSON_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") + set(JEMALLOC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") + set(GFLAGS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") + set(SNAPPY_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") + set(ZLIB_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") + set(BROTLI_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") + + if (NOT DEFINED ENV{BOOST_ROOT}) + # Since we have to set this in the environment, we check whether + # $BOOST_ROOT is defined inside here + set(ENV{BOOST_ROOT} "$ENV{ARROW_BUILD_TOOLCHAIN}") + endif() +endif() + +if (DEFINED ENV{FLATBUFFERS_HOME}) + set(FLATBUFFERS_HOME "$ENV{FLATBUFFERS_HOME}") +endif() + +if (DEFINED ENV{RAPIDJSON_HOME}) + set(RAPIDJSON_HOME "$ENV{RAPIDJSON_HOME}") +endif() + +if (DEFINED ENV{JEMALLOC_HOME}) + set(JEMALLOC_HOME "$ENV{JEMALLOC_HOME}") +endif() + +if (DEFINED ENV{GFLAGS_HOME}) + set(GFLAGS_HOME "$ENV{GFLAGS_HOME}") +endif() + +if (DEFINED ENV{SNAPPY_HOME}) + set(SNAPPY_HOME "$ENV{SNAPPY_HOME}") +endif() + +if (DEFINED ENV{ZLIB_HOME}) + set(ZLIB_HOME "$ENV{ZLIB_HOME}") +endif() + +if (DEFINED ENV{BROTLI_HOME}) + set(BROTLI_HOME "$ENV{BROTLI_HOME}") +endif() + +# ---------------------------------------------------------------------- +# Find pthreads + +if (NOT MSVC) + find_library(PTHREAD_LIBRARY pthread) + message(STATUS "Found pthread: ${PTHREAD_LIBRARY}") +endif() + +# ---------------------------------------------------------------------- +# Add Boost dependencies (code adapted from Apache Kudu (incubating)) + +set(Boost_DEBUG TRUE) +set(Boost_USE_MULTITHREADED ON) +set(Boost_ADDITIONAL_VERSIONS + "1.63.0" "1.63" + "1.62.0" "1.61" + "1.61.0" "1.62" + "1.60.0" "1.60") + +if (ARROW_BOOST_USE_SHARED) + # Find shared Boost libraries. + set(Boost_USE_STATIC_LIBS OFF) + + if(MSVC) + # disable autolinking in boost + add_definitions(-DBOOST_ALL_NO_LIB) + + # force all boost libraries to dynamic link + add_definitions(-DBOOST_ALL_DYN_LINK) + endif() + + if (ARROW_BOOST_HEADER_ONLY) + find_package(Boost) + else() + find_package(Boost COMPONENTS system filesystem REQUIRED) + if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") + set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG}) + set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG}) + else() + set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE}) + set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE}) + endif() + set(BOOST_SYSTEM_LIBRARY boost_system_shared) + set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared) + endif() +else() + # Find static boost headers and libs + # TODO Differentiate here between release and debug builds + set(Boost_USE_STATIC_LIBS ON) + if (ARROW_BOOST_HEADER_ONLY) + find_package(Boost) + else() + find_package(Boost COMPONENTS system filesystem regex REQUIRED) + if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") + set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG}) + set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG}) + else() + set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE}) + set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE}) + endif() + set(BOOST_SYSTEM_LIBRARY boost_system_static) + set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static) + endif() +endif() + +message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS}) +message(STATUS "Boost libraries: " ${Boost_LIBRARIES}) + +if (NOT ARROW_BOOST_HEADER_ONLY) + ADD_THIRDPARTY_LIB(boost_system + STATIC_LIB "${BOOST_STATIC_SYSTEM_LIBRARY}" + SHARED_LIB "${BOOST_SHARED_SYSTEM_LIBRARY}") + + ADD_THIRDPARTY_LIB(boost_filesystem + STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}" + SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}") + + SET(ARROW_BOOST_LIBS boost_system boost_filesystem) +endif() + +include_directories(SYSTEM ${Boost_INCLUDE_DIR}) + +if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS) + add_custom_target(unittest ctest -L unittest) + + if("$ENV{GTEST_HOME}" STREQUAL "") + if(APPLE) + set(GTEST_CMAKE_CXX_FLAGS "-fPIC -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes") + elseif(NOT MSVC) + set(GTEST_CMAKE_CXX_FLAGS "-fPIC") + endif() + string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE) + set(GTEST_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}} ${GTEST_CMAKE_CXX_FLAGS}") + + set(GTEST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/googletest_ep-prefix/src/googletest_ep") + set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include") + set(GTEST_STATIC_LIB + "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(GTEST_MAIN_STATIC_LIB + "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(GTEST_VENDORED 1) + set(GTEST_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_INSTALL_PREFIX=${GTEST_PREFIX} + -Dgtest_force_shared_crt=ON + -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS}) + + if (CMAKE_VERSION VERSION_GREATER "3.2") + # BUILD_BYPRODUCTS is a 3.2+ feature + ExternalProject_Add(googletest_ep + URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz" + BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB} + CMAKE_ARGS ${GTEST_CMAKE_ARGS}) + else() + ExternalProject_Add(googletest_ep + URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz" + CMAKE_ARGS ${GTEST_CMAKE_ARGS}) + endif() + else() + find_package(GTest REQUIRED) + set(GTEST_VENDORED 0) + endif() + + message(STATUS "GTest include dir: ${GTEST_INCLUDE_DIR}") + message(STATUS "GTest static library: ${GTEST_STATIC_LIB}") + include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) + ADD_THIRDPARTY_LIB(gtest + STATIC_LIB ${GTEST_STATIC_LIB}) + ADD_THIRDPARTY_LIB(gtest_main + STATIC_LIB ${GTEST_MAIN_STATIC_LIB}) + + if(GTEST_VENDORED) + add_dependencies(gtest googletest_ep) + add_dependencies(gtest_main googletest_ep) + endif() + + # gflags (formerly Googleflags) command line parsing + if("${GFLAGS_HOME}" STREQUAL "") + set(GFLAGS_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS}) + + set(GFLAGS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep") + set(GFLAGS_HOME "${GFLAGS_PREFIX}") + set(GFLAGS_INCLUDE_DIR "${GFLAGS_PREFIX}/include") + if(MSVC) + set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/gflags_static.lib") + else() + set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/libgflags.a") + endif() + set(GFLAGS_VENDORED 1) + set(GFLAGS_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_INSTALL_PREFIX=${GFLAGS_PREFIX} + -DBUILD_SHARED_LIBS=OFF + -DBUILD_STATIC_LIBS=ON + -DBUILD_PACKAGING=OFF + -DBUILD_TESTING=OFF + -BUILD_CONFIG_TESTS=OFF + -DINSTALL_HEADERS=ON + -DCMAKE_CXX_FLAGS=${GFLAGS_CMAKE_CXX_FLAGS}) + if (CMAKE_VERSION VERSION_GREATER "3.2") + # BUILD_BYPRODUCTS is a 3.2+ feature + ExternalProject_Add(gflags_ep + GIT_REPOSITORY https://github.com/gflags/gflags.git + GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee + BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS "${GFLAGS_STATIC_LIB}" + CMAKE_ARGS ${GFLAGS_CMAKE_ARGS}) + else() + ExternalProject_Add(gflags_ep + GIT_REPOSITORY https://github.com/gflags/gflags.git + GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee + BUILD_IN_SOURCE 1 + CMAKE_ARGS ${GFLAGS_CMAKE_ARGS}) + endif() + else() + set(GFLAGS_VENDORED 0) + find_package(GFlags REQUIRED) + endif() + + message(STATUS "GFlags include dir: ${GFLAGS_INCLUDE_DIR}") + message(STATUS "GFlags static library: ${GFLAGS_STATIC_LIB}") + include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR}) + ADD_THIRDPARTY_LIB(gflags + STATIC_LIB ${GFLAGS_STATIC_LIB}) + if(MSVC) + set_target_properties(gflags + PROPERTIES + IMPORTED_LINK_INTERFACE_LIBRARIES "shlwapi.lib") + endif() + + if(GFLAGS_VENDORED) + add_dependencies(gflags gflags_ep) + endif() +endif() + +if(ARROW_BUILD_BENCHMARKS) + add_custom_target(runbenchmark ctest -L benchmark) + + if("$ENV{GBENCHMARK_HOME}" STREQUAL "") + if(APPLE) + set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC -std=c++11 -stdlib=libc++") + elseif(NOT MSVC) + set(GBENCHMARK_CMAKE_CXX_FLAGS "-fPIC --std=c++11") + endif() + + set(GBENCHMARK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark_ep/src/gbenchmark_ep-install") + set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include") + set(GBENCHMARK_STATIC_LIB "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(GBENCHMARK_VENDORED 1) + set(GBENCHMARK_CMAKE_ARGS + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}" + "-DBENCHMARK_ENABLE_TESTING=OFF" + "-DCMAKE_CXX_FLAGS=${GBENCHMARK_CMAKE_CXX_FLAGS}") + if (APPLE) + set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON") + endif() + if (CMAKE_VERSION VERSION_GREATER "3.2") + # BUILD_BYPRODUCTS is a 3.2+ feature + ExternalProject_Add(gbenchmark_ep + URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz" + BUILD_BYPRODUCTS "${GBENCHMARK_STATIC_LIB}" + CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS}) + else() + ExternalProject_Add(gbenchmark_ep + URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz" + CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS}) + endif() + else() + find_package(GBenchmark REQUIRED) + set(GBENCHMARK_VENDORED 0) + endif() + + message(STATUS "GBenchmark include dir: ${GBENCHMARK_INCLUDE_DIR}") + message(STATUS "GBenchmark static library: ${GBENCHMARK_STATIC_LIB}") + include_directories(SYSTEM ${GBENCHMARK_INCLUDE_DIR}) + ADD_THIRDPARTY_LIB(benchmark + STATIC_LIB ${GBENCHMARK_STATIC_LIB}) + + if(GBENCHMARK_VENDORED) + add_dependencies(benchmark gbenchmark_ep) + endif() +endif() + +if (ARROW_IPC) + # RapidJSON, header only dependency + if("${RAPIDJSON_HOME}" STREQUAL "") + ExternalProject_Add(rapidjson_ep + PREFIX "${CMAKE_BINARY_DIR}" + URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz" + URL_MD5 "badd12c511e081fec6c89c43a7027bce" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + BUILD_IN_SOURCE 1 + INSTALL_COMMAND "") + + ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR) + set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include") + set(RAPIDJSON_VENDORED 1) + else() + set(RAPIDJSON_INCLUDE_DIR "${RAPIDJSON_HOME}/include") + set(RAPIDJSON_VENDORED 0) + endif() + message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}") + include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR}) + + ## Flatbuffers + if("${FLATBUFFERS_HOME}" STREQUAL "") + set(FLATBUFFERS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install") + ExternalProject_Add(flatbuffers_ep + URL "https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz" + CMAKE_ARGS + "-DCMAKE_CXX_FLAGS=-fPIC" + "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}" + "-DFLATBUFFERS_BUILD_TESTS=OFF") + + set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include") + set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc") + set(FLATBUFFERS_VENDORED 1) + else() + find_package(Flatbuffers REQUIRED) + set(FLATBUFFERS_VENDORED 0) + endif() + + message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}") + message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}") + include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR}) +endif() +#---------------------------------------------------------------------- + +if (MSVC) + # jemalloc is not supported on Windows + set(ARROW_JEMALLOC off) +endif() + +if (ARROW_JEMALLOC) + find_package(jemalloc) + + if(NOT JEMALLOC_FOUND) + set(ARROW_JEMALLOC_USE_SHARED OFF) + set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/") + set(JEMALLOC_HOME "${JEMALLOC_PREFIX}") + set(JEMALLOC_INCLUDE_DIR "${JEMALLOC_PREFIX}/include") + set(JEMALLOC_SHARED_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}") + set(JEMALLOC_STATIC_LIB "${JEMALLOC_PREFIX}/lib/libjemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(JEMALLOC_VENDORED 1) + if (CMAKE_VERSION VERSION_GREATER "3.2") + # BUILD_BYPRODUCTS is a 3.2+ feature + ExternalProject_Add(jemalloc_ep + URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2 + CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix=" + BUILD_IN_SOURCE 1 + BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} + BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}" "${JEMALLOC_SHARED_LIB}" + INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install) + else() + ExternalProject_Add(jemalloc_ep + URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2 + CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix=" + BUILD_IN_SOURCE 1 + BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} + INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -j1 install) + endif() + else() + set(JEMALLOC_VENDORED 0) + endif() + + include_directories(SYSTEM ${JEMALLOC_INCLUDE_DIR}) + ADD_THIRDPARTY_LIB(jemalloc + STATIC_LIB ${JEMALLOC_STATIC_LIB} + SHARED_LIB ${JEMALLOC_SHARED_LIB} + DEPS ${PTHREAD_LIBRARY}) +endif() + +## Google PerfTools +## +## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment +## near definition of ARROW_USING_GOLD). +# find_package(GPerf REQUIRED) +# if (NOT "${ARROW_USE_ASAN}" AND +# NOT "${ARROW_USE_TSAN}" AND +# NOT ("${ARROW_USING_GOLD}" AND "${ARROW_LINK}" STREQUAL "d")) +# ADD_THIRDPARTY_LIB(tcmalloc +# STATIC_LIB "${TCMALLOC_STATIC_LIB}" +# SHARED_LIB "${TCMALLOC_SHARED_LIB}") +# ADD_THIRDPARTY_LIB(profiler +# STATIC_LIB "${PROFILER_STATIC_LIB}" +# SHARED_LIB "${PROFILER_SHARED_LIB}") +# list(APPEND ARROW_BASE_LIBS tcmalloc profiler) +# add_definitions("-DTCMALLOC_ENABLED") +# set(ARROW_TCMALLOC_AVAILABLE 1) +# endif() + +######################################################################## +# HDFS thirdparty setup + +if (DEFINED ENV{HADOOP_HOME}) + set(HADOOP_HOME $ENV{HADOOP_HOME}) + if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h") + message(STATUS "Did not find hdfs.h in expected location, using vendored one") + set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop") + endif() +else() + set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop") +endif() + +set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h") +if (NOT EXISTS ${HDFS_H_PATH}) + message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}") +endif() +message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH}) + +include_directories(SYSTEM "${HADOOP_HOME}/include") + +# ---------------------------------------------------------------------- +# ZLIB + +if (NOT ARROW_ZLIB_VENDORED) + find_package(ZLIB) +endif() + +if (NOT ZLIB_FOUND) + set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") + set(ZLIB_HOME "${ZLIB_PREFIX}") + set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include") + if (MSVC) + if (${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") + set(ZLIB_STATIC_LIB_NAME zlibstaticd.lib) + else() + set(ZLIB_STATIC_LIB_NAME zlibstatic.lib) + endif() + else() + set(ZLIB_STATIC_LIB_NAME libz.a) + endif() + set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}") + set(ZLIB_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX} + -DCMAKE_C_FLAGS=${EP_C_FLAGS} + -DBUILD_SHARED_LIBS=OFF) + + if (CMAKE_VERSION VERSION_GREATER "3.2") + set(ZLIB_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}") + endif() + ExternalProject_Add(zlib_ep + URL "http://zlib.net/fossils/zlib-1.2.8.tar.gz" + ${ZLIB_BUILD_BYPRODUCTS} + CMAKE_ARGS ${ZLIB_CMAKE_ARGS}) + set(ZLIB_VENDORED 1) +else() + set(ZLIB_VENDORED 0) +endif() + +include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) +ADD_THIRDPARTY_LIB(zlib + STATIC_LIB ${ZLIB_STATIC_LIB}) + +if (ZLIB_VENDORED) + add_dependencies(zlib zlib_ep) +endif() + +# ---------------------------------------------------------------------- +# Snappy + +## Snappy +find_package(Snappy) +if (NOT SNAPPY_FOUND) + set(SNAPPY_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/snappy_ep/src/snappy_ep-install") + set(SNAPPY_HOME "${SNAPPY_PREFIX}") + set(SNAPPY_INCLUDE_DIR "${SNAPPY_PREFIX}/include") + if (MSVC) + set(SNAPPY_STATIC_LIB_NAME snappystatic) + else() + set(SNAPPY_STATIC_LIB_NAME snappy) + endif() + set(SNAPPY_STATIC_LIB "${SNAPPY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(SNAPPY_SRC_URL "https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz") + + if (${UPPERCASE_BUILD_TYPE} EQUAL "RELEASE") + if (APPLE) + set(SNAPPY_CXXFLAGS "CXXFLAGS='-DNDEBUG -O1'") + else() + set(SNAPPY_CXXFLAGS "CXXFLAGS='-DNDEBUG -O2'") + endif() + endif() + + if (CMAKE_VERSION VERSION_GREATER "3.2") + set(SNAPPY_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}") + endif() + + if (MSVC) + set(SNAPPY_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${EX_C_FLAGS}" + "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}") + set(SNAPPY_UPDATE_COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_SOURCE_DIR}/cmake_modules/SnappyCMakeLists.txt + ./CMakeLists.txt && + ${CMAKE_COMMAND} -E copy + ${CMAKE_SOURCE_DIR}/cmake_modules/SnappyConfig.h + ./config.h) + ExternalProject_Add(snappy_ep + UPDATE_COMMAND ${SNAPPY_UPDATE_COMMAND} + BUILD_IN_SOURCE 1 + BUILD_COMMAND ${MAKE} + INSTALL_DIR ${SNAPPY_PREFIX} + URL ${SNAPPY_SRC_URL} + CMAKE_ARGS ${SNAPPY_CMAKE_ARGS} + ${SNAPPY_BUILD_BYPRODUCTS}) + else() + ExternalProject_Add(snappy_ep + CONFIGURE_COMMAND ./configure --with-pic "--prefix=${SNAPPY_PREFIX}" ${SNAPPY_CXXFLAGS} + BUILD_IN_SOURCE 1 + BUILD_COMMAND ${MAKE} + INSTALL_DIR ${SNAPPY_PREFIX} + URL ${SNAPPY_SRC_URL} + ${SNAPPY_BUILD_BYPRODUCTS}) + endif() + set(SNAPPY_VENDORED 1) +else() + set(SNAPPY_VENDORED 0) +endif() + +include_directories(SYSTEM ${SNAPPY_INCLUDE_DIR}) +ADD_THIRDPARTY_LIB(snappy + STATIC_LIB ${SNAPPY_STATIC_LIB}) + +if (SNAPPY_VENDORED) + add_dependencies(snappy snappy_ep) +endif() + +# ---------------------------------------------------------------------- +# Brotli + +find_package(Brotli) +if (NOT BROTLI_FOUND) + set(BROTLI_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/brotli_ep/src/brotli_ep-install") + set(BROTLI_HOME "${BROTLI_PREFIX}") + set(BROTLI_INCLUDE_DIR "${BROTLI_PREFIX}/include") + if (MSVC) + set(BROTLI_LIB_DIR bin) + else() + set(BROTLI_LIB_DIR lib) + endif() + set(BROTLI_STATIC_LIBRARY_ENC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(BROTLI_STATIC_LIBRARY_DEC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(BROTLI_STATIC_LIBRARY_COMMON "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(BROTLI_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${EX_C_FLAGS}" + -DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX} + -DCMAKE_INSTALL_LIBDIR=lib/${CMAKE_LIBRARY_ARCHITECTURE} + -DBUILD_SHARED_LIBS=OFF) + + if (CMAKE_VERSION VERSION_GREATER "3.2") + set(BROTLI_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" "${BROTLI_STATIC_LIBRARY_DEC}" "${BROTLI_STATIC_LIBRARY_COMMON}") + endif() + + ExternalProject_Add(brotli_ep + URL "https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz" + ${BROTLI_BUILD_BYPRODUCTS} + CMAKE_ARGS ${BROTLI_CMAKE_ARGS} + STEP_TARGETS headers_copy) + if (MSVC) + ExternalProject_Get_Property(brotli_ep SOURCE_DIR) + + ExternalProject_Add_Step(brotli_ep headers_copy + COMMAND xcopy /E /I include ..\\..\\..\\brotli_ep\\src\\brotli_ep-install\\include /Y + DEPENDEES build + WORKING_DIRECTORY ${SOURCE_DIR}) + endif() + set(BROTLI_VENDORED 1) +else() + set(BROTLI_VENDORED 0) +endif() + +include_directories(SYSTEM ${BROTLI_INCLUDE_DIR}) +ADD_THIRDPARTY_LIB(brotli_enc + STATIC_LIB ${BROTLI_STATIC_LIBRARY_ENC}) +ADD_THIRDPARTY_LIB(brotli_dec + STATIC_LIB ${BROTLI_STATIC_LIBRARY_DEC}) +ADD_THIRDPARTY_LIB(brotli_common + STATIC_LIB ${BROTLI_STATIC_LIBRARY_COMMON}) + +if (BROTLI_VENDORED) + add_dependencies(brotli_enc brotli_ep) + add_dependencies(brotli_dec brotli_ep) + add_dependencies(brotli_common brotli_ep) +endif() diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt index 30852291d1a8d..bc2a815097d05 100644 --- a/cpp/src/arrow/python/CMakeLists.txt +++ b/cpp/src/arrow/python/CMakeLists.txt @@ -34,8 +34,8 @@ endif() set(ARROW_PYTHON_MIN_TEST_LIBS arrow_python_test_main - arrow_python_static - arrow_static) + arrow_python_shared + arrow_shared) set(ARROW_PYTHON_TEST_LINK_LIBS ${ARROW_PYTHON_MIN_TEST_LIBS}) diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index ac7e86615eb40..279eaf5c3be6d 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -22,11 +22,12 @@ # Headers: top level install(FILES bit-util.h + key_value_metadata.h logging.h macros.h random.h + stl.h visibility.h - key_value_metadata.h DESTINATION include/arrow/util) ####################################### @@ -51,6 +52,7 @@ if (ARROW_BUILD_BENCHMARKS) endif() ADD_ARROW_TEST(bit-util-test) -ADD_ARROW_TEST(stl-util-test) +ADD_ARROW_TEST(compression-test) ADD_ARROW_TEST(decimal-test) ADD_ARROW_TEST(key-value-metadata-test) +ADD_ARROW_TEST(stl-util-test) diff --git a/cpp/src/arrow/util/compression-test.cc b/cpp/src/arrow/util/compression-test.cc new file mode 100644 index 0000000000000..1a0e5d7023cf1 --- /dev/null +++ b/cpp/src/arrow/util/compression-test.cc @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include + +#include "arrow/status.h" +#include "arrow/test-common.h" +#include "arrow/util/compression.h" + +using std::string; +using std::vector; + +namespace arrow { + +template +void CheckCodecRoundtrip(const vector& data) { + // create multiple compressors to try to break them + T c1; + T c2; + + int max_compressed_len = static_cast(c1.MaxCompressedLen(data.size(), &data[0])); + std::vector compressed(max_compressed_len); + std::vector decompressed(data.size()); + + // compress with c1 + int64_t actual_size; + ASSERT_OK(c1.Compress( + data.size(), &data[0], max_compressed_len, &compressed[0], &actual_size)); + compressed.resize(actual_size); + + // decompress with c2 + ASSERT_OK(c2.Decompress( + compressed.size(), &compressed[0], decompressed.size(), &decompressed[0])); + + ASSERT_EQ(data, decompressed); + + // compress with c2 + int64_t actual_size2; + ASSERT_OK(c2.Compress( + data.size(), &data[0], max_compressed_len, &compressed[0], &actual_size2)); + ASSERT_EQ(actual_size2, actual_size); + + // decompress with c1 + ASSERT_OK(c1.Decompress( + compressed.size(), &compressed[0], decompressed.size(), &decompressed[0])); + + ASSERT_EQ(data, decompressed); +} + +template +void CheckCodec() { + int sizes[] = {10000, 100000}; + for (int data_size : sizes) { + vector data(data_size); + test::random_bytes(data_size, 1234, data.data()); + CheckCodecRoundtrip(data); + } +} + +TEST(TestCompressors, Snappy) { + CheckCodec(); +} + +TEST(TestCompressors, Brotli) { + CheckCodec(); +} + +TEST(TestCompressors, GZip) { + CheckCodec(); +} + +} // namespace arrow diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc new file mode 100644 index 0000000000000..7154d17beac38 --- /dev/null +++ b/cpp/src/arrow/util/compression.cc @@ -0,0 +1,327 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/util/compression.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "arrow/status.h" +#include "arrow/util/logging.h" + +namespace arrow { + +Codec::~Codec() {} + +Status Codec::Create(Compression::type codec_type, std::unique_ptr* result) { + switch (codec_type) { + case Compression::UNCOMPRESSED: + break; + case Compression::SNAPPY: + result->reset(new SnappyCodec()); + break; + case Compression::GZIP: + result->reset(new GZipCodec()); + break; + case Compression::LZO: + return Status::NotImplemented("LZO codec not implemented"); + case Compression::BROTLI: + result->reset(new BrotliCodec()); + break; + default: + return Status::Invalid("Unrecognized codec"); + } + return Status::OK(); +} + +// ---------------------------------------------------------------------- +// gzip implementation + +// These are magic numbers from zlib.h. Not clear why they are not defined +// there. + +// Maximum window size +static constexpr int WINDOW_BITS = 15; + +// Output Gzip. +static constexpr int GZIP_CODEC = 16; + +// Determine if this is libz or gzip from header. +static constexpr int DETECT_CODEC = 32; + +class GZipCodec::GZipCodecImpl { + public: + explicit GZipCodecImpl(GZipCodec::Format format) + : format_(format), + compressor_initialized_(false), + decompressor_initialized_(false) {} + + ~GZipCodecImpl() { + EndCompressor(); + EndDecompressor(); + } + + Status InitCompressor() { + EndDecompressor(); + memset(&stream_, 0, sizeof(stream_)); + + int ret; + // Initialize to run specified format + int window_bits = WINDOW_BITS; + if (format_ == DEFLATE) { + window_bits = -window_bits; + } else if (format_ == GZIP) { + window_bits += GZIP_CODEC; + } + if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits, 9, + Z_DEFAULT_STRATEGY)) != Z_OK) { + std::stringstream ss; + ss << "zlib deflateInit failed: " << std::string(stream_.msg); + return Status::IOError(ss.str()); + } + compressor_initialized_ = true; + return Status::OK(); + } + + void EndCompressor() { + if (compressor_initialized_) { (void)deflateEnd(&stream_); } + compressor_initialized_ = false; + } + + Status InitDecompressor() { + EndCompressor(); + memset(&stream_, 0, sizeof(stream_)); + int ret; + + // Initialize to run either deflate or zlib/gzip format + int window_bits = format_ == DEFLATE ? -WINDOW_BITS : WINDOW_BITS | DETECT_CODEC; + if ((ret = inflateInit2(&stream_, window_bits)) != Z_OK) { + std::stringstream ss; + ss << "zlib inflateInit failed: " << std::string(stream_.msg); + return Status::IOError(ss.str()); + } + decompressor_initialized_ = true; + return Status::OK(); + } + + void EndDecompressor() { + if (decompressor_initialized_) { (void)inflateEnd(&stream_); } + decompressor_initialized_ = false; + } + + Status Decompress(int64_t input_length, const uint8_t* input, int64_t output_length, + uint8_t* output) { + if (!decompressor_initialized_) { RETURN_NOT_OK(InitDecompressor()); } + if (output_length == 0) { + // The zlib library does not allow *output to be NULL, even when output_length + // is 0 (inflate() will return Z_STREAM_ERROR). We don't consider this an + // error, so bail early if no output is expected. Note that we don't signal + // an error if the input actually contains compressed data. + return Status::OK(); + } + + // Reset the stream for this block + if (inflateReset(&stream_) != Z_OK) { + std::stringstream ss; + ss << "zlib inflateReset failed: " << std::string(stream_.msg); + return Status::IOError(ss.str()); + } + + int ret = 0; + // gzip can run in streaming mode or non-streaming mode. We only + // support the non-streaming use case where we present it the entire + // compressed input and a buffer big enough to contain the entire + // compressed output. In the case where we don't know the output, + // we just make a bigger buffer and try the non-streaming mode + // from the beginning again. + while (ret != Z_STREAM_END) { + stream_.next_in = const_cast(reinterpret_cast(input)); + stream_.avail_in = static_cast(input_length); + stream_.next_out = reinterpret_cast(output); + stream_.avail_out = static_cast(output_length); + + // We know the output size. In this case, we can use Z_FINISH + // which is more efficient. + ret = inflate(&stream_, Z_FINISH); + if (ret == Z_STREAM_END || ret != Z_OK) break; + + // Failure, buffer was too small + std::stringstream ss; + ss << "Too small a buffer passed to GZipCodec. InputLength=" << input_length + << " OutputLength=" << output_length; + return Status::IOError(ss.str()); + } + + // Failure for some other reason + if (ret != Z_STREAM_END) { + std::stringstream ss; + ss << "GZipCodec failed: "; + if (stream_.msg != NULL) ss << stream_.msg; + return Status::IOError(ss.str()); + } + return Status::OK(); + } + + int64_t MaxCompressedLen(int64_t input_length, const uint8_t* input) { + // Most be in compression mode + if (!compressor_initialized_) { + Status s = InitCompressor(); + DCHECK(s.ok()); + } + // TODO(wesm): deal with zlib < 1.2.3 (see Impala codebase) + return deflateBound(&stream_, static_cast(input_length)); + } + + Status Compress(int64_t input_length, const uint8_t* input, int64_t output_buffer_len, + uint8_t* output, int64_t* output_length) { + if (!compressor_initialized_) { RETURN_NOT_OK(InitCompressor()); } + stream_.next_in = const_cast(reinterpret_cast(input)); + stream_.avail_in = static_cast(input_length); + stream_.next_out = reinterpret_cast(output); + stream_.avail_out = static_cast(output_buffer_len); + + int64_t ret = 0; + if ((ret = deflate(&stream_, Z_FINISH)) != Z_STREAM_END) { + if (ret == Z_OK) { + // will return Z_OK (and stream.msg NOT set) if stream.avail_out is too + // small + return Status::IOError("zlib deflate failed, output buffer too small"); + } + std::stringstream ss; + ss << "zlib deflate failed: " << stream_.msg; + return Status::IOError(ss.str()); + } + + if (deflateReset(&stream_) != Z_OK) { + std::stringstream ss; + ss << "zlib deflateReset failed: " << std::string(stream_.msg); + return Status::IOError(ss.str()); + } + + // Actual output length + *output_length = output_buffer_len - stream_.avail_out; + return Status::OK(); + } + + private: + // zlib is stateful and the z_stream state variable must be initialized + // before + z_stream stream_; + + // Realistically, this will always be GZIP, but we leave the option open to + // configure + GZipCodec::Format format_; + + // These variables are mutually exclusive. When the codec is in "compressor" + // state, compressor_initialized_ is true while decompressor_initialized_ is + // false. When it's decompressing, the opposite is true. + // + // Indeed, this is slightly hacky, but the alternative is having separate + // Compressor and Decompressor classes. If this ever becomes an issue, we can + // perform the refactoring then + bool compressor_initialized_; + bool decompressor_initialized_; +}; + +GZipCodec::GZipCodec(Format format) { + impl_.reset(new GZipCodecImpl(format)); +} + +GZipCodec::~GZipCodec() {} + +Status GZipCodec::Decompress(int64_t input_length, const uint8_t* input, + int64_t output_buffer_len, uint8_t* output) { + return impl_->Decompress(input_length, input, output_buffer_len, output); +} + +int64_t GZipCodec::MaxCompressedLen(int64_t input_length, const uint8_t* input) { + return impl_->MaxCompressedLen(input_length, input); +} + +Status GZipCodec::Compress(int64_t input_length, const uint8_t* input, + int64_t output_buffer_len, uint8_t* output, int64_t* output_length) { + return impl_->Compress(input_length, input, output_buffer_len, output, output_length); +} + +const char* GZipCodec::name() const { + return "gzip"; +} + +// ---------------------------------------------------------------------- +// Snappy implementation + +Status SnappyCodec::Decompress( + int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) { + if (!snappy::RawUncompress(reinterpret_cast(input), + static_cast(input_len), reinterpret_cast(output_buffer))) { + return Status::IOError("Corrupt snappy compressed data."); + } + return Status::OK(); +} + +int64_t SnappyCodec::MaxCompressedLen(int64_t input_len, const uint8_t* input) { + return snappy::MaxCompressedLength(input_len); +} + +Status SnappyCodec::Compress(int64_t input_len, const uint8_t* input, + int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) { + size_t output_len; + snappy::RawCompress(reinterpret_cast(input), + static_cast(input_len), reinterpret_cast(output_buffer), + &output_len); + *output_length = static_cast(output_len); + return Status::OK(); +} + +// ---------------------------------------------------------------------- +// Brotli implementation + +Status BrotliCodec::Decompress( + int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) { + size_t output_size = output_len; + if (BrotliDecoderDecompress(input_len, input, &output_size, output_buffer) != + BROTLI_DECODER_RESULT_SUCCESS) { + return Status::IOError("Corrupt brotli compressed data."); + } + return Status::OK(); +} + +int64_t BrotliCodec::MaxCompressedLen(int64_t input_len, const uint8_t* input) { + return BrotliEncoderMaxCompressedSize(input_len); +} + +Status BrotliCodec::Compress(int64_t input_len, const uint8_t* input, + int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) { + size_t output_len = output_buffer_len; + // TODO: Make quality configurable. We use 8 as a default as it is the best + // trade-off for Parquet workload + if (BrotliEncoderCompress(8, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE, input_len, + input, &output_len, output_buffer) == BROTLI_FALSE) { + return Status::IOError("Brotli compression failure."); + } + *output_length = output_len; + return Status::OK(); +} + +} // namespace parquet diff --git a/cpp/src/arrow/util/compression.h b/cpp/src/arrow/util/compression.h new file mode 100644 index 0000000000000..6886d04c7620d --- /dev/null +++ b/cpp/src/arrow/util/compression.h @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_UTIL_COMPRESSION_H +#define ARROW_UTIL_COMPRESSION_H + +#include +#include + +#include "arrow/status.h" +#include "arrow/util/visibility.h" + +namespace arrow { + +struct Compression { + enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI }; +}; + +class ARROW_EXPORT Codec { + public: + virtual ~Codec(); + + static Status Create(Compression::type codec, std::unique_ptr* out); + + virtual Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len, + uint8_t* output_buffer) = 0; + + virtual Status Compress(int64_t input_len, const uint8_t* input, + int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) = 0; + + virtual int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) = 0; + + virtual const char* name() const = 0; +}; + +// Snappy codec. +class ARROW_EXPORT SnappyCodec : public Codec { + public: + Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len, + uint8_t* output_buffer) override; + + Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len, + uint8_t* output_buffer, int64_t* output_length) override; + + int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override; + + const char* name() const override { return "snappy"; } +}; + +// Brotli codec. +class ARROW_EXPORT BrotliCodec : public Codec { + public: + Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len, + uint8_t* output_buffer) override; + + Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len, + uint8_t* output_buffer, int64_t* output_length) override; + + int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override; + + const char* name() const override { return "brotli"; } +}; + +// GZip codec. +class ARROW_EXPORT GZipCodec : public Codec { + public: + /// Compression formats supported by the zlib library + enum Format { + ZLIB, + DEFLATE, + GZIP, + }; + + explicit GZipCodec(Format format = GZIP); + virtual ~GZipCodec(); + + Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len, + uint8_t* output_buffer) override; + + Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len, + uint8_t* output_buffer, int64_t* output_length) override; + + int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override; + + const char* name() const override; + + private: + // The gzip compressor is stateful + class GZipCodecImpl; + std::unique_ptr impl_; +}; + +} // namespace arrow + +#endif diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h index 49f1699f13623..8a929da0e0231 100644 --- a/cpp/src/arrow/util/logging.h +++ b/cpp/src/arrow/util/logging.h @@ -39,9 +39,10 @@ namespace arrow { #define ARROW_LOG_INTERNAL(level) ::arrow::internal::CerrLog(level) #define ARROW_LOG(level) ARROW_LOG_INTERNAL(ARROW_##level) -#define ARROW_CHECK(condition) \ - (condition) ? 0 : ::arrow::internal::FatalLog(ARROW_FATAL) \ - << __FILE__ << __LINE__ << " Check failed: " #condition " " +#define ARROW_CHECK(condition) \ + (condition) ? 0 \ + : ::arrow::internal::FatalLog(ARROW_FATAL) \ + << __FILE__ << __LINE__ << " Check failed: " #condition " " #ifdef NDEBUG #define ARROW_DFATAL ARROW_WARNING