From bd02afadf7dec00a3a93fe7bbde06c443590220a Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 23 Aug 2022 09:06:22 +0900 Subject: [PATCH] ARROW-17451: [CI][Java] Use manylinux2014 image for JNI (#13920) Because our official .jar packages are built in manylinux2014 image. Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .github/workflows/java_jni.yml | 12 +++--- ci/docker/java-jni-manylinux-201x.dockerfile | 10 ++++- ci/scripts/java_build.sh | 7 +++- ci/scripts/java_jni_macos_build.sh | 25 +++++++++--- ci/scripts/java_jni_manylinux_build.sh | 26 ++++++++++--- ci/scripts/java_test.sh | 9 +++-- cpp/src/plasma/CMakeLists.txt | 10 ++--- dev/tasks/java-jars/github.yml | 11 +++++- docker-compose.yml | 40 +++----------------- 9 files changed, 87 insertions(+), 63 deletions(-) diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index 07cc3b1265212..64afc2de3e581 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -47,7 +47,7 @@ env: jobs: docker: - name: AMD64 Debian 9 Java JNI (Gandiva, Plasma, ORC, Dataset) + name: AMD64 manylinux2014 Java JNI runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 90 @@ -61,8 +61,8 @@ jobs: uses: actions/cache@v2 with: path: .docker - key: maven-${{ hashFiles('java/**') }} - restore-keys: maven- + key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }} + restore-keys: java-jni-manylinux-2014- - name: Setup Python uses: actions/setup-python@v4 with: @@ -70,14 +70,14 @@ jobs: - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build - run: archery docker run debian-java-jni + run: archery docker run java-jni-manylinux-2014 - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' continue-on-error: true - run: archery docker push debian-java-jni + run: archery docker push java-jni-manylinux-2014 docker_integration_python: - name: AMD64 Debian 9 Java C Data Interface Integration + name: AMD64 Conda Java C Data Interface Integration runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 90 diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile index 52bdb9b923dec..de953fd5ae057 100644 --- a/ci/docker/java-jni-manylinux-201x.dockerfile +++ b/ci/docker/java-jni-manylinux-201x.dockerfile @@ -33,5 +33,13 @@ RUN vcpkg install \ # Install Java ARG java=1.8.0 -RUN yum install -y java-$java-openjdk-devel && yum clean all +RUN yum install -y java-$java-openjdk-devel rh-maven35 && yum clean all ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/ + +# For ci/scripts/java_*.sh +ENV ARROW_GANDIVA_JAVA=ON \ + ARROW_HOME=/tmp/local \ + ARROW_JAVA_CDATA=ON \ + ARROW_JNI=ON \ + ARROW_PLASMA=ON \ + ARROW_USE_CCACHE=ON diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh index 120d04ffc2df9..ac252f55b37b5 100755 --- a/ci/scripts/java_build.sh +++ b/ci/scripts/java_build.sh @@ -18,10 +18,13 @@ set -ex +if [[ "${ARROW_JAVA_BUILD:-ON}" != "ON" ]]; then + exit +fi + arrow_dir=${1} source_dir=${1}/java build_dir=${2} -cpp_build_dir=${build_dir}/cpp/${ARROW_BUILD_TYPE:-debug} java_jni_dist_dir=${3} : ${BUILD_DOCS_JAVA:=OFF} @@ -85,7 +88,7 @@ if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then fi if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then - ${mvn} -Darrow.cpp.build.dir=${cpp_build_dir} -Parrow-jni install + ${mvn} -Darrow.cpp.build.dir=${java_jni_dist_dir} -Parrow-jni install fi if [ "${ARROW_PLASMA}" = "ON" ]; then diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 590c469e398ff..5418daaf0113b 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -29,6 +29,7 @@ echo "=== Clear output directories and leftovers ===" rm -rf ${build_dir} echo "=== Building Arrow C++ libraries ===" +install_dir=${build_dir}/cpp-install : ${ARROW_BUILD_TESTS:=OFF} : ${ARROW_DATASET:=ON} : ${ARROW_FILESYSTEM:=ON} @@ -40,9 +41,15 @@ echo "=== Building Arrow C++ libraries ===" : ${ARROW_PLASMA:=ON} : ${ARROW_PYTHON:=OFF} : ${ARROW_S3:=ON} +: ${ARROW_USE_CCACHE:=OFF} : ${CMAKE_BUILD_TYPE:=Release} : ${CMAKE_UNITY_BUILD:=ON} +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics before build ===" + ccache -s +fi + export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" export AWS_EC2_METADATA_DISABLED=TRUE @@ -75,12 +82,13 @@ cmake \ -DARROW_S3=${ARROW_S3} \ -DARROW_SNAPPY_USE_SHARED=OFF \ -DARROW_THRIFT_USE_SHARED=OFF \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ -DARROW_UTF8PROC_USE_SHARED=OFF \ -DARROW_ZSTD_USE_SHARED=OFF \ -DAWSSDK_SOURCE=BUNDLED \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_LIBDIR=lib \ - -DCMAKE_INSTALL_PREFIX=${build_dir}/cpp \ + -DCMAKE_INSTALL_PREFIX=${install_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ -DPARQUET_BUILD_EXAMPLES=OFF \ -DPARQUET_BUILD_EXECUTABLES=OFF \ @@ -102,12 +110,17 @@ ${arrow_dir}/ci/scripts/java_jni_build.sh \ ${build_dir} \ ${dist_dir} +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics after build ===" + ccache -s +fi echo "=== Copying libraries to the distribution folder ===" mkdir -p "${dist_dir}" -cp -L ${build_dir}/cpp/lib/libgandiva_jni.dylib ${dist_dir} -cp -L ${build_dir}/cpp/lib/libarrow_dataset_jni.dylib ${dist_dir} -cp -L ${build_dir}/cpp/lib/libarrow_orc_jni.dylib ${dist_dir} +cp -L ${install_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir} +cp -L ${install_dir}/lib/libarrow_orc_jni.dylib ${dist_dir} +cp -L ${install_dir}/lib/libgandiva_jni.dylib ${dist_dir} +cp -L ${build_dir}/cpp/*/libplasma_java.dylib ${dist_dir} echo "=== Checking shared dependencies for libraries ===" @@ -122,9 +135,11 @@ archery linking check-dependencies \ --allow libcurl \ --allow libgandiva_jni \ --allow libncurses \ + --allow libplasma_java \ --allow libz \ libarrow_cdata_jni.dylib \ libarrow_dataset_jni.dylib \ libarrow_orc_jni.dylib \ - libgandiva_jni.dylib + libgandiva_jni.dylib \ + libplasma_java.dylib popd diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 008f19140ee0e..331d74b34a1f4 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -32,6 +32,7 @@ echo "=== Building Arrow C++ libraries ===" devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ grep -o "^[0-9]*") devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" +: ${ARROW_BUILD_TESTS:=OFF} : ${ARROW_DATASET:=ON} : ${ARROW_GANDIVA:=ON} : ${ARROW_GANDIVA_JAVA:=ON} @@ -44,7 +45,7 @@ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/includ : ${ARROW_PLASMA_JAVA_CLIENT:=ON} : ${ARROW_PYTHON:=OFF} : ${ARROW_S3:=ON} -: ${ARROW_BUILD_TESTS:=OFF} +: ${ARROW_USE_CCACHE:=OFF} : ${CMAKE_BUILD_TYPE:=Release} : ${CMAKE_UNITY_BUILD:=ON} : ${VCPKG_ROOT:=/opt/vcpkg} @@ -52,6 +53,11 @@ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/includ : ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} : ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-isystem;-lpthread} +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics before build ===" + ccache -s +fi + export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" export AWS_EC2_METADATA_DISABLED=TRUE @@ -87,11 +93,12 @@ cmake \ -DARROW_S3=${ARROW_S3} \ -DARROW_SNAPPY_USE_SHARED=OFF \ -DARROW_THRIFT_USE_SHARED=OFF \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ -DARROW_UTF8PROC_USE_SHARED=OFF \ -DARROW_ZSTD_USE_SHARED=OFF \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_LIBDIR=lib \ - -DCMAKE_INSTALL_PREFIX=${build_dir}/cpp \ + -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ -DORC_SOURCE=BUNDLED \ -DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \ @@ -126,11 +133,17 @@ ${arrow_dir}/ci/scripts/java_jni_build.sh \ ${build_dir} \ ${dist_dir} +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics after build ===" + ccache -s +fi + echo "=== Copying libraries to the distribution folder ===" -cp -L ${build_dir}/cpp/lib/libgandiva_jni.so ${dist_dir} -cp -L ${build_dir}/cpp/lib/libarrow_dataset_jni.so ${dist_dir} -cp -L ${build_dir}/cpp/lib/libarrow_orc_jni.so ${dist_dir} +cp -L ${ARROW_HOME}/lib/libarrow_dataset_jni.so ${dist_dir} +cp -L ${ARROW_HOME}/lib/libarrow_orc_jni.so ${dist_dir} +cp -L ${ARROW_HOME}/lib/libgandiva_jni.so ${dist_dir} +cp -L ${build_dir}/cpp/*/libplasma_java.so ${dist_dir} echo "=== Checking shared dependencies for libraries ===" @@ -149,5 +162,6 @@ archery linking check-dependencies \ libarrow_cdata_jni.so \ libarrow_dataset_jni.so \ libarrow_orc_jni.so \ - libgandiva_jni.so + libgandiva_jni.so \ + libplasma_java.so popd diff --git a/ci/scripts/java_test.sh b/ci/scripts/java_test.sh index 83ef26fdb1a51..bb30894d9eff0 100755 --- a/ci/scripts/java_test.sh +++ b/ci/scripts/java_test.sh @@ -18,9 +18,12 @@ set -ex +if [[ "${ARROW_JAVA_TEST:-ON}" != "ON" ]]; then + exit +fi + arrow_dir=${1} source_dir=${1}/java -cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} java_jni_dist_dir=${3} # For JNI and Plasma tests @@ -36,7 +39,7 @@ pushd ${source_dir} ${mvn} test if [ "${ARROW_JNI}" = "ON" ]; then - ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir} + ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${java_jni_dist_dir} fi if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then @@ -46,7 +49,7 @@ fi if [ "${ARROW_PLASMA}" = "ON" ]; then pushd ${source_dir}/plasma java -cp target/test-classes:target/classes \ - -Djava.library.path=${cpp_build_dir} \ + -Djava.library.path=${java_jni_dist_dir} \ org.apache.arrow.plasma.PlasmaClientTest popd fi diff --git a/cpp/src/plasma/CMakeLists.txt b/cpp/src/plasma/CMakeLists.txt index d78a5ccfdccbe..2573693738590 100644 --- a/cpp/src/plasma/CMakeLists.txt +++ b/cpp/src/plasma/CMakeLists.txt @@ -185,14 +185,14 @@ if(ARROW_PLASMA_JAVA_CLIENT) if(APPLE) target_link_libraries(plasma_java - plasma_shared - ${PLASMA_LINK_LIBS} + plasma_static + ${PLASMA_STATIC_LINK_LIBS} "-undefined dynamic_lookup" ${PTHREAD_LIBRARY}) - else(APPLE) - target_link_libraries(plasma_java plasma_shared ${PLASMA_LINK_LIBS} + else() + target_link_libraries(plasma_java plasma_static ${PLASMA_STATIC_LINK_LIBS} ${PTHREAD_LIBRARY}) - endif(APPLE) + endif() endif() # # Unit tests diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index 376ba78fe14db..23b97087c394b 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -28,7 +28,11 @@ jobs: {{ macros.github_checkout_arrow()|indent }} {{ macros.github_install_archery()|indent }} - name: Build C++ Libs - run: archery docker run java-jni-manylinux-2014 + run: | + archery docker run \ + -e ARROW_JAVA_BUILD=OFF \ + -e ARROW_JAVA_TEST=OFF \ + java-jni-manylinux-2014 - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/java-dist/ - name: Upload Artifacts @@ -61,6 +65,9 @@ jobs: # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. brew uninstall aws-sdk-cpp + - name: Setup ccache + run: | + arrow/ci/scripts/ccache_setup.sh - name: Build C++ Libs run: | set -e @@ -100,11 +107,13 @@ jobs: test -f arrow/java-dist/libarrow_cdata_jni.dylib test -f arrow/java-dist/libarrow_dataset_jni.dylib test -f arrow/java-dist/libgandiva_jni.dylib + test -f arrow/java-dist/libplasma_java.dylib test -f arrow/java-dist/libarrow_orc_jni.dylib test -f arrow/java-dist/libarrow_cdata_jni.so test -f arrow/java-dist/libarrow_dataset_jni.so test -f arrow/java-dist/libarrow_orc_jni.so test -f arrow/java-dist/libgandiva_jni.so + test -f arrow/java-dist/libplasma_java.so - name: Build Bundled Jar run: | set -e diff --git a/docker-compose.yml b/docker-compose.yml index 9ab6c0dd14873..751a81fa5540a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -122,7 +122,6 @@ x-hierarchy: - debian-go-cgo - debian-go-cgo-python - debian-java - - debian-java-jni - debian-js - fedora-cpp: - fedora-python @@ -999,10 +998,14 @@ services: <<: *ccache volumes: - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated command: - ["pip install -e /arrow/dev/archery && - /arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist"] + ["pip install -e /arrow/dev/archery && \ + /arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist && \ + source /opt/rh/rh-maven35/enable && \ + /arrow/ci/scripts/java_build.sh /arrow /build /arrow/java-dist && \ + /arrow/ci/scripts/java_test.sh /arrow /build /arrow/java-dist"] ############################## Integration ################################# @@ -1541,37 +1544,6 @@ services: /arrow/ci/scripts/java_build.sh /arrow /build && /arrow/ci/scripts/java_test.sh /arrow /build" - debian-java-jni: - # Includes plasma test, jni for gandiva and orc, and C data interface. - # Usage: - # docker-compose build debian-java - # docker-compose build debian-java-jni - # docker-compose run debian-java-jni - image: ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni - build: - context: . - dockerfile: ci/docker/linux-apt-jni.dockerfile - cache_from: - - ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni - args: - llvm: ${LLVM} - shm_size: *shm-size - environment: - <<: *ccache - ARROW_BUILD_TESTS: "OFF" - ARROW_S3: "OFF" - ARROW_SUBSTRAIT: "OFF" - volumes: - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - - ${DOCKER_VOLUME_PREFIX}debian-ccache:/ccache:delegated - command: - /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/java_jni_build.sh /arrow /build /tmp/java_dist && - /arrow/ci/scripts/java_build.sh /arrow /build /tmp/java_dist && - /arrow/ci/scripts/java_test.sh /arrow /build /tmp/java_dist" - oracle-java: # Usage: # docker-compose build oracle-java