Skip to content

Commit

Permalink
ARROW-17081: [Java][Datasets] Move JNI build configuration from cpp/ …
Browse files Browse the repository at this point in the history
…to java/ (#13911)

Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
kou authored Sep 6, 2022
1 parent 50a7d15 commit 7a0ba80
Show file tree
Hide file tree
Showing 23 changed files with 288 additions and 250 deletions.
3 changes: 2 additions & 1 deletion ci/docker/java-jni-manylinux-201x.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ RUN vcpkg install \
--clean-after-build \
--x-install-root=${VCPKG_ROOT}/installed \
--x-manifest-root=/arrow/ci/vcpkg \
--x-feature=dev \
--x-feature=flight \
--x-feature=gcs \
--x-feature=json \
Expand All @@ -36,7 +37,7 @@ ARG java=1.8.0
RUN yum install -y java-$java-openjdk-devel rh-maven35 && yum clean all
ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/

# For ci/scripts/java_*.sh
# For ci/scripts/{cpp,java}_*.sh
ENV ARROW_GANDIVA_JAVA=ON \
ARROW_HOME=/tmp/local \
ARROW_JAVA_CDATA=ON \
Expand Down
35 changes: 31 additions & 4 deletions ci/scripts/java_jni_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
set -ex

arrow_dir=${1}
build_dir=${2}/java_jni
arrow_install_dir=${2}
build_dir=${3}/java_jni
# The directory where the final binaries will be stored when scripts finish
dist_dir=${3}
dist_dir=${4}

echo "=== Clear output directories and leftovers ==="
# Clear output directories and leftovers
Expand All @@ -32,11 +33,37 @@ echo "=== Building Arrow Java C Data Interface native library ==="
mkdir -p "${build_dir}"
pushd "${build_dir}"

case "$(uname)" in
Linux)
n_jobs=$(nproc)
;;
Darwin)
n_jobs=$(sysctl -n hw.ncpu)
;;
*)
n_jobs=${NPROC:-1}
;;
esac

: ${ARROW_JAVA_BUILD_TESTS:=${ARROW_BUILD_TESTS:-OFF}}
: ${CMAKE_BUILD_TYPE:=release}
cmake \
-DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-release} \
-DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_DATASET:-ON} \
-DBUILD_TESTING=${ARROW_JAVA_BUILD_TESTS} \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_PREFIX_PATH=${arrow_install_dir} \
-DCMAKE_INSTALL_PREFIX=${dist_dir} \
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
-GNinja \
${JAVA_JNI_CMAKE_ARGS:-} \
${arrow_dir}/java
cmake --build . --target install --config ${ARROW_BUILD_TYPE:-release}
export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs}
cmake --build . --config ${CMAKE_BUILD_TYPE}
if [ "${ARROW_JAVA_BUILD_TESTS}" = "ON" ]; then
ctest \
--output-on-failure \
--parallel ${n_jobs} \
--timeout 300
fi
cmake --build . --config ${CMAKE_BUILD_TYPE} --target install
popd
32 changes: 15 additions & 17 deletions ci/scripts/java_jni_macos_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ rm -rf ${build_dir}

echo "=== Building Arrow C++ libraries ==="
install_dir=${build_dir}/cpp-install
: ${ARROW_BUILD_TESTS:=OFF}
: ${ARROW_BUILD_TESTS:=ON}
: ${ARROW_DATASET:=ON}
: ${ARROW_FILESYSTEM:=ON}
: ${ARROW_GANDIVA_JAVA:=ON}
Expand All @@ -39,7 +39,6 @@ install_dir=${build_dir}/cpp-install
: ${ARROW_PARQUET:=ON}
: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
: ${ARROW_PLASMA:=ON}
: ${ARROW_PYTHON:=OFF}
: ${ARROW_S3:=ON}
: ${ARROW_USE_CCACHE:=OFF}
: ${CMAKE_BUILD_TYPE:=Release}
Expand All @@ -58,33 +57,23 @@ mkdir -p "${build_dir}/cpp"
pushd "${build_dir}/cpp"

cmake \
-DARROW_BOOST_USE_SHARED=OFF \
-DARROW_BROTLI_USE_SHARED=OFF \
-DARROW_BUILD_SHARED=OFF \
-DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
-DARROW_BUILD_UTILITIES=OFF \
-DARROW_BZ2_USE_SHARED=OFF \
-DARROW_CSV=${ARROW_DATASET} \
-DARROW_DATASET=${ARROW_DATASET} \
-DARROW_DEPENDENCY_USE_SHARED=OFF \
-DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
-DARROW_GANDIVA=${ARROW_GANDIVA} \
-DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
-DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
-DARROW_GFLAGS_USE_SHARED=OFF \
-DARROW_GRPC_USE_SHARED=OFF \
-DARROW_JNI=ON \
-DARROW_LZ4_USE_SHARED=OFF \
-DARROW_OPENSSL_USE_SHARED=OFF \
-DARROW_ORC=${ARROW_ORC} \
-DARROW_PARQUET=${ARROW_PARQUET} \
-DARROW_PLASMA=${ARROW_PLASMA} \
-DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
-DARROW_PROTOBUF_USE_SHARED=OFF \
-DARROW_PYTHON=${ARROW_PYTHON} \
-DARROW_S3=${ARROW_S3} \
-DARROW_SNAPPY_USE_SHARED=OFF \
-DARROW_THRIFT_USE_SHARED=OFF \
-DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
-DARROW_UTF8PROC_USE_SHARED=OFF \
-DARROW_ZSTD_USE_SHARED=OFF \
-DAWSSDK_SOURCE=BUNDLED \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_INSTALL_LIBDIR=lib \
Expand All @@ -99,14 +88,24 @@ cmake \
cmake --build . --target install

if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then
ctest
# MinIO is required
exclude_tests="arrow-s3fs-test"
# unstable
exclude_tests="${exclude_tests}|arrow-compute-hash-join-node-test"
ctest \
--exclude-regex "${exclude_tests}" \
--label-regex unittest \
--output-on-failure \
--parallel $(sysctl -n hw.ncpu) \
--timeout 300
fi

popd


${arrow_dir}/ci/scripts/java_jni_build.sh \
${arrow_dir} \
${install_dir} \
${build_dir} \
${dist_dir}

Expand All @@ -117,7 +116,6 @@ fi

echo "=== Copying libraries to the distribution folder ==="
mkdir -p "${dist_dir}"
cp -L ${install_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir}
cp -L ${install_dir}/lib/libarrow_orc_jni.dylib ${dist_dir}
cp -L ${install_dir}/lib/libgandiva_jni.dylib ${dist_dir}
cp -L ${build_dir}/cpp/*/libplasma_java.dylib ${dist_dir}
Expand Down
39 changes: 17 additions & 22 deletions ci/scripts/java_jni_manylinux_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ echo "=== Building Arrow C++ libraries ==="
devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \
grep -o "^[0-9]*")
devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
: ${ARROW_BUILD_TESTS:=OFF}
: ${ARROW_BUILD_TESTS:=ON}
: ${ARROW_DATASET:=ON}
: ${ARROW_GANDIVA:=ON}
: ${ARROW_GANDIVA_JAVA:=ON}
Expand All @@ -43,10 +43,9 @@ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/includ
: ${ARROW_PARQUET:=ON}
: ${ARROW_PLASMA:=ON}
: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
: ${ARROW_PYTHON:=OFF}
: ${ARROW_S3:=ON}
: ${ARROW_USE_CCACHE:=OFF}
: ${CMAKE_BUILD_TYPE:=Release}
: ${CMAKE_BUILD_TYPE:=release}
: ${CMAKE_UNITY_BUILD:=ON}
: ${VCPKG_ROOT:=/opt/vcpkg}
: ${VCPKG_FEATURE_FLAGS:=-manifests}
Expand All @@ -66,36 +65,26 @@ mkdir -p "${build_dir}/cpp"
pushd "${build_dir}/cpp"

cmake \
-DARROW_BOOST_USE_SHARED=OFF \
-DARROW_BROTLI_USE_SHARED=OFF \
-DARROW_BUILD_SHARED=ON \
-DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
-DARROW_BUILD_SHARED=OFF \
-DARROW_BUILD_TESTS=ON \
-DARROW_BUILD_UTILITIES=OFF \
-DARROW_BZ2_USE_SHARED=OFF \
-DARROW_CSV=${ARROW_DATASET} \
-DARROW_DATASET=${ARROW_DATASET} \
-DARROW_DEPENDENCY_SOURCE="VCPKG" \
-DARROW_DEPENDENCY_USE_SHARED=OFF \
-DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
-DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
-DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
-DARROW_GANDIVA=${ARROW_GANDIVA} \
-DARROW_GRPC_USE_SHARED=OFF \
-DARROW_JEMALLOC=${ARROW_JEMALLOC} \
-DARROW_JNI=ON \
-DARROW_LZ4_USE_SHARED=OFF \
-DARROW_OPENSSL_USE_SHARED=OFF \
-DARROW_ORC=${ARROW_ORC} \
-DARROW_PARQUET=${ARROW_PARQUET} \
-DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
-DARROW_PLASMA=${ARROW_PLASMA} \
-DARROW_PROTOBUF_USE_SHARED=OFF \
-DARROW_PYTHON=${ARROW_PYTHON} \
-DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \
-DARROW_S3=${ARROW_S3} \
-DARROW_SNAPPY_USE_SHARED=OFF \
-DARROW_THRIFT_USE_SHARED=OFF \
-DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
-DARROW_UTF8PROC_USE_SHARED=OFF \
-DARROW_ZSTD_USE_SHARED=OFF \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
Expand All @@ -105,16 +94,22 @@ cmake \
-DPARQUET_BUILD_EXAMPLES=OFF \
-DPARQUET_BUILD_EXECUTABLES=OFF \
-DPARQUET_REQUIRE_ENCRYPTION=OFF \
-DPythonInterp_FIND_VERSION_MAJOR=3 \
-DPythonInterp_FIND_VERSION=ON \
-DVCPKG_MANIFEST_MODE=OFF \
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
-GNinja \
${arrow_dir}/cpp
ninja install

if [ $ARROW_BUILD_TESTS = "ON" ]; then
if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then
# MinIO is required
exclude_tests="arrow-s3fs-test"
# unstable
exclude_tests="${exclude_tests}|arrow-compute-hash-join-node-test"
exclude_tests="${exclude_tests}|arrow-dataset-scanner-test"
# strptime
exclude_tests="${exclude_tests}|arrow-utility-test"
ctest \
--exclude-regex "${exclude_tests}" \
--label-regex unittest \
--output-on-failure \
--parallel $(nproc) \
Expand All @@ -125,11 +120,12 @@ popd


JAVA_JNI_CMAKE_ARGS=""
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_MANIFEST_MODE=OFF"
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}"
export JAVA_JNI_CMAKE_ARGS
${arrow_dir}/ci/scripts/java_jni_build.sh \
${arrow_dir} \
${ARROW_HOME} \
${build_dir} \
${dist_dir}

Expand All @@ -140,7 +136,6 @@ fi


echo "=== Copying libraries to the distribution folder ==="
cp -L ${ARROW_HOME}/lib/libarrow_dataset_jni.so ${dist_dir}
cp -L ${ARROW_HOME}/lib/libarrow_orc_jni.so ${dist_dir}
cp -L ${ARROW_HOME}/lib/libgandiva_jni.so ${dist_dir}
cp -L ${build_dir}/cpp/*/libplasma_java.so ${dist_dir}
Expand Down
1 change: 1 addition & 0 deletions ci/vcpkg/vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"description": "Development dependencies",
"dependencies": [
"benchmark",
"boost-process",
"gtest"
]
},
Expand Down
2 changes: 2 additions & 0 deletions cpp/Brewfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ brew "cmake"
brew "flatbuffers"
brew "git"
brew "glog"
brew "googletest"
brew "grpc"
brew "llvm"
brew "llvm@12"
Expand All @@ -39,4 +40,5 @@ brew "rapidjson"
brew "snappy"
brew "thrift"
brew "wget"
brew "xsimd"
brew "zstd"
19 changes: 16 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,19 @@ endif()
if(ARROW_S3)
list(APPEND ARROW_SHARED_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
list(APPEND ARROW_STATIC_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
if(AWSSDK_SOURCE STREQUAL "SYSTEM")
list(APPEND
ARROW_STATIC_INSTALL_INTERFACE_LIBS
aws-cpp-sdk-identity-management
aws-cpp-sdk-sts
aws-cpp-sdk-cognito-identity
aws-cpp-sdk-s3
aws-cpp-sdk-core)
elseif(AWSSDK_SOURCE STREQUAL "BUNDLED")
if(UNIX)
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl)
endif()
endif()
endif()

if(ARROW_WITH_OPENTELEMETRY)
Expand Down Expand Up @@ -851,6 +864,9 @@ add_dependencies(arrow_test_dependencies toolchain-tests)
if(ARROW_STATIC_LINK_LIBS)
add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS})
if(ARROW_HDFS OR ARROW_ORC)
if(Protobuf_SOURCE STREQUAL "SYSTEM")
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF})
endif()
if(NOT MSVC_TOOLCHAIN)
list(APPEND ARROW_STATIC_LINK_LIBS ${CMAKE_DL_LIBS})
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})
Expand Down Expand Up @@ -977,9 +993,6 @@ if(ARROW_JNI)
if(ARROW_ORC)
add_subdirectory(../java/adapter/orc/src/main/cpp ./java/orc/jni)
endif()
if(ARROW_DATASET)
add_subdirectory(../java/dataset/src/main/cpp ./java/dataset/jni)
endif()
endif()

if(ARROW_GANDIVA)
Expand Down
50 changes: 50 additions & 0 deletions cpp/cmake_modules/FindAWSSDKAlt.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set(find_package_args)
if(AWSSDKAlt_FIND_VERSION)
list(APPEND find_package_args ${AWSSDKAlt_FIND_VERSION})
endif()
if(AWSSDKAlt_FIND_QUIETLY)
list(APPEND find_package_args QUIET)
endif()
# See https://aws.amazon.com/blogs/developer/developer-experience-of-the-aws-sdk-for-c-now-simplified-by-cmake/
# Workaround to force AWS CMake configuration to look for shared libraries
if(DEFINED ENV{CONDA_PREFIX})
if(DEFINED BUILD_SHARED_LIBS)
set(BUILD_SHARED_LIBS_WAS_SET TRUE)
set(BUILD_SHARED_LIBS_KEEP ${BUILD_SHARED_LIBS})
else()
set(BUILD_SHARED_LIBS_WAS_SET FALSE)
endif()
set(BUILD_SHARED_LIBS ON)
endif()
find_package(AWSSDK ${find_package_args}
COMPONENTS config
s3
transfer
identity-management
sts)
# Restore previous value of BUILD_SHARED_LIBS
if(DEFINED ENV{CONDA_PREFIX})
if(BUILD_SHARED_LIBS_WAS_SET)
set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_KEEP})
else()
unset(BUILD_SHARED_LIBS)
endif()
endif()
set(AWSSDKAlt_FOUND ${AWSSDK_FOUND})
Loading

0 comments on commit 7a0ba80

Please sign in to comment.