Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-17081: [Java][Datasets] Move JNI build configuration from cpp/ to java/ #13911

Merged
merged 24 commits into from
Sep 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ci/docker/java-jni-manylinux-201x.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ RUN vcpkg install \
--clean-after-build \
--x-install-root=${VCPKG_ROOT}/installed \
--x-manifest-root=/arrow/ci/vcpkg \
--x-feature=dev \
--x-feature=flight \
--x-feature=gcs \
--x-feature=json \
Expand All @@ -36,7 +37,7 @@ ARG java=1.8.0
RUN yum install -y java-$java-openjdk-devel rh-maven35 && yum clean all
ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/

# For ci/scripts/java_*.sh
# For ci/scripts/{cpp,java}_*.sh
ENV ARROW_GANDIVA_JAVA=ON \
ARROW_HOME=/tmp/local \
ARROW_JAVA_CDATA=ON \
Expand Down
35 changes: 31 additions & 4 deletions ci/scripts/java_jni_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
set -ex

arrow_dir=${1}
build_dir=${2}/java_jni
arrow_install_dir=${2}
build_dir=${3}/java_jni
# The directory where the final binaries will be stored when scripts finish
dist_dir=${3}
dist_dir=${4}

echo "=== Clear output directories and leftovers ==="
# Clear output directories and leftovers
Expand All @@ -32,11 +33,37 @@ echo "=== Building Arrow Java C Data Interface native library ==="
mkdir -p "${build_dir}"
pushd "${build_dir}"

case "$(uname)" in
Linux)
n_jobs=$(nproc)
;;
Darwin)
n_jobs=$(sysctl -n hw.ncpu)
;;
*)
n_jobs=${NPROC:-1}
;;
esac

: ${ARROW_JAVA_BUILD_TESTS:=${ARROW_BUILD_TESTS:-OFF}}
: ${CMAKE_BUILD_TYPE:=release}
cmake \
-DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-release} \
-DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_DATASET:-ON} \
-DBUILD_TESTING=${ARROW_JAVA_BUILD_TESTS} \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_PREFIX_PATH=${arrow_install_dir} \
-DCMAKE_INSTALL_PREFIX=${dist_dir} \
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
-GNinja \
${JAVA_JNI_CMAKE_ARGS:-} \
${arrow_dir}/java
cmake --build . --target install --config ${ARROW_BUILD_TYPE:-release}
export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs}
cmake --build . --config ${CMAKE_BUILD_TYPE}
if [ "${ARROW_JAVA_BUILD_TESTS}" = "ON" ]; then
ctest \
--output-on-failure \
--parallel ${n_jobs} \
--timeout 300
fi
cmake --build . --config ${CMAKE_BUILD_TYPE} --target install
popd
32 changes: 15 additions & 17 deletions ci/scripts/java_jni_macos_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ rm -rf ${build_dir}

echo "=== Building Arrow C++ libraries ==="
install_dir=${build_dir}/cpp-install
: ${ARROW_BUILD_TESTS:=OFF}
: ${ARROW_BUILD_TESTS:=ON}
: ${ARROW_DATASET:=ON}
: ${ARROW_FILESYSTEM:=ON}
: ${ARROW_GANDIVA_JAVA:=ON}
Expand All @@ -39,7 +39,6 @@ install_dir=${build_dir}/cpp-install
: ${ARROW_PARQUET:=ON}
: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
: ${ARROW_PLASMA:=ON}
: ${ARROW_PYTHON:=OFF}
: ${ARROW_S3:=ON}
: ${ARROW_USE_CCACHE:=OFF}
: ${CMAKE_BUILD_TYPE:=Release}
Expand All @@ -58,33 +57,23 @@ mkdir -p "${build_dir}/cpp"
pushd "${build_dir}/cpp"

cmake \
-DARROW_BOOST_USE_SHARED=OFF \
-DARROW_BROTLI_USE_SHARED=OFF \
-DARROW_BUILD_SHARED=OFF \
-DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
-DARROW_BUILD_UTILITIES=OFF \
-DARROW_BZ2_USE_SHARED=OFF \
-DARROW_CSV=${ARROW_DATASET} \
-DARROW_DATASET=${ARROW_DATASET} \
-DARROW_DEPENDENCY_USE_SHARED=OFF \
-DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
-DARROW_GANDIVA=${ARROW_GANDIVA} \
-DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
-DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
-DARROW_GFLAGS_USE_SHARED=OFF \
-DARROW_GRPC_USE_SHARED=OFF \
-DARROW_JNI=ON \
-DARROW_LZ4_USE_SHARED=OFF \
-DARROW_OPENSSL_USE_SHARED=OFF \
-DARROW_ORC=${ARROW_ORC} \
-DARROW_PARQUET=${ARROW_PARQUET} \
-DARROW_PLASMA=${ARROW_PLASMA} \
-DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
-DARROW_PROTOBUF_USE_SHARED=OFF \
-DARROW_PYTHON=${ARROW_PYTHON} \
-DARROW_S3=${ARROW_S3} \
-DARROW_SNAPPY_USE_SHARED=OFF \
-DARROW_THRIFT_USE_SHARED=OFF \
-DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
-DARROW_UTF8PROC_USE_SHARED=OFF \
-DARROW_ZSTD_USE_SHARED=OFF \
-DAWSSDK_SOURCE=BUNDLED \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_INSTALL_LIBDIR=lib \
Expand All @@ -99,14 +88,24 @@ cmake \
cmake --build . --target install

if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then
ctest
# MinIO is required
exclude_tests="arrow-s3fs-test"
# unstable
exclude_tests="${exclude_tests}|arrow-compute-hash-join-node-test"
ctest \
--exclude-regex "${exclude_tests}" \
--label-regex unittest \
--output-on-failure \
--parallel $(sysctl -n hw.ncpu) \
--timeout 300
fi

popd


${arrow_dir}/ci/scripts/java_jni_build.sh \
${arrow_dir} \
${install_dir} \
${build_dir} \
${dist_dir}

Expand All @@ -117,7 +116,6 @@ fi

echo "=== Copying libraries to the distribution folder ==="
mkdir -p "${dist_dir}"
cp -L ${install_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir}
cp -L ${install_dir}/lib/libarrow_orc_jni.dylib ${dist_dir}
cp -L ${install_dir}/lib/libgandiva_jni.dylib ${dist_dir}
cp -L ${build_dir}/cpp/*/libplasma_java.dylib ${dist_dir}
Expand Down
39 changes: 17 additions & 22 deletions ci/scripts/java_jni_manylinux_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ echo "=== Building Arrow C++ libraries ==="
devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \
grep -o "^[0-9]*")
devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
: ${ARROW_BUILD_TESTS:=OFF}
: ${ARROW_BUILD_TESTS:=ON}
: ${ARROW_DATASET:=ON}
: ${ARROW_GANDIVA:=ON}
: ${ARROW_GANDIVA_JAVA:=ON}
Expand All @@ -43,10 +43,9 @@ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/includ
: ${ARROW_PARQUET:=ON}
: ${ARROW_PLASMA:=ON}
: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
: ${ARROW_PYTHON:=OFF}
: ${ARROW_S3:=ON}
: ${ARROW_USE_CCACHE:=OFF}
: ${CMAKE_BUILD_TYPE:=Release}
: ${CMAKE_BUILD_TYPE:=release}
: ${CMAKE_UNITY_BUILD:=ON}
: ${VCPKG_ROOT:=/opt/vcpkg}
: ${VCPKG_FEATURE_FLAGS:=-manifests}
Expand All @@ -66,36 +65,26 @@ mkdir -p "${build_dir}/cpp"
pushd "${build_dir}/cpp"

cmake \
-DARROW_BOOST_USE_SHARED=OFF \
-DARROW_BROTLI_USE_SHARED=OFF \
-DARROW_BUILD_SHARED=ON \
-DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
-DARROW_BUILD_SHARED=OFF \
-DARROW_BUILD_TESTS=ON \
-DARROW_BUILD_UTILITIES=OFF \
-DARROW_BZ2_USE_SHARED=OFF \
-DARROW_CSV=${ARROW_DATASET} \
-DARROW_DATASET=${ARROW_DATASET} \
-DARROW_DEPENDENCY_SOURCE="VCPKG" \
-DARROW_DEPENDENCY_USE_SHARED=OFF \
-DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
-DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
-DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
-DARROW_GANDIVA=${ARROW_GANDIVA} \
-DARROW_GRPC_USE_SHARED=OFF \
-DARROW_JEMALLOC=${ARROW_JEMALLOC} \
-DARROW_JNI=ON \
-DARROW_LZ4_USE_SHARED=OFF \
-DARROW_OPENSSL_USE_SHARED=OFF \
-DARROW_ORC=${ARROW_ORC} \
-DARROW_PARQUET=${ARROW_PARQUET} \
-DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
-DARROW_PLASMA=${ARROW_PLASMA} \
-DARROW_PROTOBUF_USE_SHARED=OFF \
-DARROW_PYTHON=${ARROW_PYTHON} \
-DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \
-DARROW_S3=${ARROW_S3} \
-DARROW_SNAPPY_USE_SHARED=OFF \
-DARROW_THRIFT_USE_SHARED=OFF \
-DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
-DARROW_UTF8PROC_USE_SHARED=OFF \
-DARROW_ZSTD_USE_SHARED=OFF \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
Expand All @@ -105,16 +94,22 @@ cmake \
-DPARQUET_BUILD_EXAMPLES=OFF \
-DPARQUET_BUILD_EXECUTABLES=OFF \
-DPARQUET_REQUIRE_ENCRYPTION=OFF \
-DPythonInterp_FIND_VERSION_MAJOR=3 \
-DPythonInterp_FIND_VERSION=ON \
-DVCPKG_MANIFEST_MODE=OFF \
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
-GNinja \
${arrow_dir}/cpp
ninja install

if [ $ARROW_BUILD_TESTS = "ON" ]; then
if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then
# MinIO is required
exclude_tests="arrow-s3fs-test"
# unstable
exclude_tests="${exclude_tests}|arrow-compute-hash-join-node-test"
exclude_tests="${exclude_tests}|arrow-dataset-scanner-test"
# strptime
exclude_tests="${exclude_tests}|arrow-utility-test"
ctest \
--exclude-regex "${exclude_tests}" \
--label-regex unittest \
--output-on-failure \
--parallel $(nproc) \
Expand All @@ -125,11 +120,12 @@ popd


JAVA_JNI_CMAKE_ARGS=""
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_MANIFEST_MODE=OFF"
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}"
export JAVA_JNI_CMAKE_ARGS
${arrow_dir}/ci/scripts/java_jni_build.sh \
${arrow_dir} \
${ARROW_HOME} \
${build_dir} \
${dist_dir}

Expand All @@ -140,7 +136,6 @@ fi


echo "=== Copying libraries to the distribution folder ==="
cp -L ${ARROW_HOME}/lib/libarrow_dataset_jni.so ${dist_dir}
cp -L ${ARROW_HOME}/lib/libarrow_orc_jni.so ${dist_dir}
cp -L ${ARROW_HOME}/lib/libgandiva_jni.so ${dist_dir}
cp -L ${build_dir}/cpp/*/libplasma_java.so ${dist_dir}
Expand Down
1 change: 1 addition & 0 deletions ci/vcpkg/vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"description": "Development dependencies",
"dependencies": [
"benchmark",
"boost-process",
"gtest"
]
},
Expand Down
2 changes: 2 additions & 0 deletions cpp/Brewfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ brew "cmake"
brew "flatbuffers"
brew "git"
brew "glog"
brew "googletest"
brew "grpc"
brew "llvm"
brew "llvm@12"
Expand All @@ -39,4 +40,5 @@ brew "rapidjson"
brew "snappy"
brew "thrift"
brew "wget"
brew "xsimd"
brew "zstd"
19 changes: 16 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,19 @@ endif()
if(ARROW_S3)
list(APPEND ARROW_SHARED_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
list(APPEND ARROW_STATIC_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
if(AWSSDK_SOURCE STREQUAL "SYSTEM")
list(APPEND
ARROW_STATIC_INSTALL_INTERFACE_LIBS
aws-cpp-sdk-identity-management
aws-cpp-sdk-sts
aws-cpp-sdk-cognito-identity
aws-cpp-sdk-s3
aws-cpp-sdk-core)
elseif(AWSSDK_SOURCE STREQUAL "BUNDLED")
if(UNIX)
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl)
endif()
endif()
endif()

if(ARROW_WITH_OPENTELEMETRY)
Expand Down Expand Up @@ -851,6 +864,9 @@ add_dependencies(arrow_test_dependencies toolchain-tests)
if(ARROW_STATIC_LINK_LIBS)
add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS})
if(ARROW_HDFS OR ARROW_ORC)
if(Protobuf_SOURCE STREQUAL "SYSTEM")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kou This seems to require protobuf when HDFS is enabled, is there any reason for it? Is it a mistake?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, good catch! It might be a mistake. Let's try removing this: #39136

list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF})
endif()
if(NOT MSVC_TOOLCHAIN)
list(APPEND ARROW_STATIC_LINK_LIBS ${CMAKE_DL_LIBS})
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})
Expand Down Expand Up @@ -977,9 +993,6 @@ if(ARROW_JNI)
if(ARROW_ORC)
add_subdirectory(../java/adapter/orc/src/main/cpp ./java/orc/jni)
endif()
if(ARROW_DATASET)
add_subdirectory(../java/dataset/src/main/cpp ./java/dataset/jni)
endif()
endif()

if(ARROW_GANDIVA)
Expand Down
50 changes: 50 additions & 0 deletions cpp/cmake_modules/FindAWSSDKAlt.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set(find_package_args)
if(AWSSDKAlt_FIND_VERSION)
list(APPEND find_package_args ${AWSSDKAlt_FIND_VERSION})
endif()
if(AWSSDKAlt_FIND_QUIETLY)
list(APPEND find_package_args QUIET)
endif()
# See https://aws.amazon.com/blogs/developer/developer-experience-of-the-aws-sdk-for-c-now-simplified-by-cmake/
# Workaround to force AWS CMake configuration to look for shared libraries
if(DEFINED ENV{CONDA_PREFIX})
if(DEFINED BUILD_SHARED_LIBS)
set(BUILD_SHARED_LIBS_WAS_SET TRUE)
set(BUILD_SHARED_LIBS_KEEP ${BUILD_SHARED_LIBS})
else()
set(BUILD_SHARED_LIBS_WAS_SET FALSE)
endif()
set(BUILD_SHARED_LIBS ON)
endif()
find_package(AWSSDK ${find_package_args}
COMPONENTS config
s3
transfer
identity-management
sts)
# Restore previous value of BUILD_SHARED_LIBS
if(DEFINED ENV{CONDA_PREFIX})
if(BUILD_SHARED_LIBS_WAS_SET)
set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_KEEP})
else()
unset(BUILD_SHARED_LIBS)
endif()
endif()
set(AWSSDKAlt_FOUND ${AWSSDK_FOUND})
Loading