diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3582f29bf11..2f8687eb360 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -44,6 +44,8 @@ option(BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks" option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON) option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON) option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF) +option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF) +option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF) option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" ON) option(PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF) option(DISABLE_DEPRECATION_WARNING "Disable warnings generated from deprecated declarations." OFF) diff --git a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake index e15f3f7e16d..8cef3e8b9d0 100644 --- a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake @@ -14,11 +14,10 @@ # limitations under the License. #============================================================================= -function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3) +function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_PYTHON ENABLE_PARQUET) set(ARROW_BUILD_SHARED ON) set(ARROW_BUILD_STATIC OFF) - set(ARROW_BUILD_S3 OFF) set(CPMAddOrFindPackage CPMFindPackage) if(NOT ARROW_ARMV8_ARCH) @@ -36,10 +35,23 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3) set(CPMAddOrFindPackage CPMAddPackage) endif() - if(ENABLE_S3) - set(ARROW_BUILD_S3 ON) + set(ARROW_PYTHON_OPTIONS "") + if(ENABLE_PYTHON) + list(APPEND ARROW_PYTHON_OPTIONS "ARROW_PYTHON ON") + # Arrow's logic to build Boost from source is busted, so we have to get it from the system. + list(APPEND ARROW_PYTHON_OPTIONS "BOOST_SOURCE SYSTEM") + # Arrow's logic to find Thrift is busted, so we have to build it from + # source. Why can't we use `THRIFT_SOURCE BUNDLED` you might ask? + # Because that's _also_ busted. The only thing that seems to is to set + # _all_ dependencies to bundled, then optionall un-set BOOST_SOURCE to + # SYSTEM. + list(APPEND ARROW_PYTHON_OPTIONS "ARROW_DEPENDENCY_SOURCE BUNDLED") endif() + # Set this so Arrow correctly finds the CUDA toolkit when the build machine + # does not have the CUDA driver installed. This must be an env var. + set(ENV{CUDA_LIB_PATH} "${CUDAToolkit_LIBRARY_DIR}/stubs") + cmake_language(CALL ${CPMAddOrFindPackage} NAME Arrow VERSION ${VERSION} @@ -55,7 +67,10 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3) "ARROW_WITH_BACKTRACE ON" "ARROW_CXXFLAGS -w" "ARROW_JEMALLOC OFF" - "ARROW_S3 ${ARROW_BUILD_S3}" + "ARROW_S3 ${ENABLE_S3}" + # e.g. needed by blazingsql-io + "ARROW_PARQUET ${ENABLE_PARQUET}" + ${ARROW_PYTHON_OPTIONS} # Arrow modifies CMake's GLOBAL RULE_LAUNCH_COMPILE unless this is off "ARROW_USE_CCACHE OFF" "ARROW_ARMV8_ARCH ${ARROW_ARMV8_ARCH}" @@ -98,13 +113,17 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3) DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/arrow/util") file(INSTALL "${Arrow_BINARY_DIR}/src/arrow/gpu/cuda_version.h" DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/arrow/gpu") + if(ENABLE_PARQUET) + file(INSTALL "${Arrow_BINARY_DIR}/src/parquet/parquet_version.h" + DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/parquet") + endif() ### # This shouldn't be necessary! # # Arrow populates INTERFACE_INCLUDE_DIRECTORIES for the `arrow_static` # and `arrow_shared` targets in FindArrow and FindArrowCUDA respectively, # so for static source-builds, we have to do it after-the-fact. - # + # # This only works because we know exactly which components we're using. # Don't forget to update this list if we add more! ### @@ -129,4 +148,10 @@ endfunction() set(CUDF_VERSION_Arrow 4.0.1) -find_and_configure_arrow(${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3}) +find_and_configure_arrow( + ${CUDF_VERSION_Arrow} + ${CUDF_USE_ARROW_STATIC} + ${CUDF_ENABLE_ARROW_S3} + ${CUDF_ENABLE_ARROW_PYTHON} + ${CUDF_ENABLE_ARROW_PARQUET} +)