Skip to content

Commit

Permalink
ARROW-17451: [CI][Java] Use manylinux2014 image for JNI (apache#13920)
Browse files Browse the repository at this point in the history
Because our official .jar packages are built in manylinux2014 image.

Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
kou authored and zagto committed Oct 7, 2022
1 parent 42943ab commit bd02afa
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 63 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/java_jni.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ env:
jobs:

docker:
name: AMD64 Debian 9 Java JNI (Gandiva, Plasma, ORC, Dataset)
name: AMD64 manylinux2014 Java JNI
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 90
Expand All @@ -61,23 +61,23 @@ jobs:
uses: actions/cache@v2
with:
path: .docker
key: maven-${{ hashFiles('java/**') }}
restore-keys: maven-
key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }}
restore-keys: java-jni-manylinux-2014-
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: 3.8
- name: Setup Archery
run: pip install -e dev/archery[docker]
- name: Execute Docker Build
run: archery docker run debian-java-jni
run: archery docker run java-jni-manylinux-2014
- name: Docker Push
if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
continue-on-error: true
run: archery docker push debian-java-jni
run: archery docker push java-jni-manylinux-2014

docker_integration_python:
name: AMD64 Debian 9 Java C Data Interface Integration
name: AMD64 Conda Java C Data Interface Integration
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 90
Expand Down
10 changes: 9 additions & 1 deletion ci/docker/java-jni-manylinux-201x.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,13 @@ RUN vcpkg install \

# Install Java
ARG java=1.8.0
RUN yum install -y java-$java-openjdk-devel && yum clean all
RUN yum install -y java-$java-openjdk-devel rh-maven35 && yum clean all
ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/

# For ci/scripts/java_*.sh
ENV ARROW_GANDIVA_JAVA=ON \
ARROW_HOME=/tmp/local \
ARROW_JAVA_CDATA=ON \
ARROW_JNI=ON \
ARROW_PLASMA=ON \
ARROW_USE_CCACHE=ON
7 changes: 5 additions & 2 deletions ci/scripts/java_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@

set -ex

if [[ "${ARROW_JAVA_BUILD:-ON}" != "ON" ]]; then
exit
fi

arrow_dir=${1}
source_dir=${1}/java
build_dir=${2}
cpp_build_dir=${build_dir}/cpp/${ARROW_BUILD_TYPE:-debug}
java_jni_dist_dir=${3}

: ${BUILD_DOCS_JAVA:=OFF}
Expand Down Expand Up @@ -85,7 +88,7 @@ if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then
fi

if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then
${mvn} -Darrow.cpp.build.dir=${cpp_build_dir} -Parrow-jni install
${mvn} -Darrow.cpp.build.dir=${java_jni_dist_dir} -Parrow-jni install
fi

if [ "${ARROW_PLASMA}" = "ON" ]; then
Expand Down
25 changes: 20 additions & 5 deletions ci/scripts/java_jni_macos_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ echo "=== Clear output directories and leftovers ==="
rm -rf ${build_dir}

echo "=== Building Arrow C++ libraries ==="
install_dir=${build_dir}/cpp-install
: ${ARROW_BUILD_TESTS:=OFF}
: ${ARROW_DATASET:=ON}
: ${ARROW_FILESYSTEM:=ON}
Expand All @@ -40,9 +41,15 @@ echo "=== Building Arrow C++ libraries ==="
: ${ARROW_PLASMA:=ON}
: ${ARROW_PYTHON:=OFF}
: ${ARROW_S3:=ON}
: ${ARROW_USE_CCACHE:=OFF}
: ${CMAKE_BUILD_TYPE:=Release}
: ${CMAKE_UNITY_BUILD:=ON}

if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
echo "=== ccache statistics before build ==="
ccache -s
fi

export ARROW_TEST_DATA="${arrow_dir}/testing/data"
export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
export AWS_EC2_METADATA_DISABLED=TRUE
Expand Down Expand Up @@ -75,12 +82,13 @@ cmake \
-DARROW_S3=${ARROW_S3} \
-DARROW_SNAPPY_USE_SHARED=OFF \
-DARROW_THRIFT_USE_SHARED=OFF \
-DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
-DARROW_UTF8PROC_USE_SHARED=OFF \
-DARROW_ZSTD_USE_SHARED=OFF \
-DAWSSDK_SOURCE=BUNDLED \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX=${build_dir}/cpp \
-DCMAKE_INSTALL_PREFIX=${install_dir} \
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
-DPARQUET_BUILD_EXAMPLES=OFF \
-DPARQUET_BUILD_EXECUTABLES=OFF \
Expand All @@ -102,12 +110,17 @@ ${arrow_dir}/ci/scripts/java_jni_build.sh \
${build_dir} \
${dist_dir}

if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
echo "=== ccache statistics after build ==="
ccache -s
fi

echo "=== Copying libraries to the distribution folder ==="
mkdir -p "${dist_dir}"
cp -L ${build_dir}/cpp/lib/libgandiva_jni.dylib ${dist_dir}
cp -L ${build_dir}/cpp/lib/libarrow_dataset_jni.dylib ${dist_dir}
cp -L ${build_dir}/cpp/lib/libarrow_orc_jni.dylib ${dist_dir}
cp -L ${install_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir}
cp -L ${install_dir}/lib/libarrow_orc_jni.dylib ${dist_dir}
cp -L ${install_dir}/lib/libgandiva_jni.dylib ${dist_dir}
cp -L ${build_dir}/cpp/*/libplasma_java.dylib ${dist_dir}

echo "=== Checking shared dependencies for libraries ==="

Expand All @@ -122,9 +135,11 @@ archery linking check-dependencies \
--allow libcurl \
--allow libgandiva_jni \
--allow libncurses \
--allow libplasma_java \
--allow libz \
libarrow_cdata_jni.dylib \
libarrow_dataset_jni.dylib \
libarrow_orc_jni.dylib \
libgandiva_jni.dylib
libgandiva_jni.dylib \
libplasma_java.dylib
popd
26 changes: 20 additions & 6 deletions ci/scripts/java_jni_manylinux_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ echo "=== Building Arrow C++ libraries ==="
devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \
grep -o "^[0-9]*")
devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
: ${ARROW_BUILD_TESTS:=OFF}
: ${ARROW_DATASET:=ON}
: ${ARROW_GANDIVA:=ON}
: ${ARROW_GANDIVA_JAVA:=ON}
Expand All @@ -44,14 +45,19 @@ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/includ
: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
: ${ARROW_PYTHON:=OFF}
: ${ARROW_S3:=ON}
: ${ARROW_BUILD_TESTS:=OFF}
: ${ARROW_USE_CCACHE:=OFF}
: ${CMAKE_BUILD_TYPE:=Release}
: ${CMAKE_UNITY_BUILD:=ON}
: ${VCPKG_ROOT:=/opt/vcpkg}
: ${VCPKG_FEATURE_FLAGS:=-manifests}
: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-isystem;-lpthread}

if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
echo "=== ccache statistics before build ==="
ccache -s
fi

export ARROW_TEST_DATA="${arrow_dir}/testing/data"
export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
export AWS_EC2_METADATA_DISABLED=TRUE
Expand Down Expand Up @@ -87,11 +93,12 @@ cmake \
-DARROW_S3=${ARROW_S3} \
-DARROW_SNAPPY_USE_SHARED=OFF \
-DARROW_THRIFT_USE_SHARED=OFF \
-DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
-DARROW_UTF8PROC_USE_SHARED=OFF \
-DARROW_ZSTD_USE_SHARED=OFF \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX=${build_dir}/cpp \
-DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
-DORC_SOURCE=BUNDLED \
-DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \
Expand Down Expand Up @@ -126,11 +133,17 @@ ${arrow_dir}/ci/scripts/java_jni_build.sh \
${build_dir} \
${dist_dir}

if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
echo "=== ccache statistics after build ==="
ccache -s
fi


echo "=== Copying libraries to the distribution folder ==="
cp -L ${build_dir}/cpp/lib/libgandiva_jni.so ${dist_dir}
cp -L ${build_dir}/cpp/lib/libarrow_dataset_jni.so ${dist_dir}
cp -L ${build_dir}/cpp/lib/libarrow_orc_jni.so ${dist_dir}
cp -L ${ARROW_HOME}/lib/libarrow_dataset_jni.so ${dist_dir}
cp -L ${ARROW_HOME}/lib/libarrow_orc_jni.so ${dist_dir}
cp -L ${ARROW_HOME}/lib/libgandiva_jni.so ${dist_dir}
cp -L ${build_dir}/cpp/*/libplasma_java.so ${dist_dir}

echo "=== Checking shared dependencies for libraries ==="

Expand All @@ -149,5 +162,6 @@ archery linking check-dependencies \
libarrow_cdata_jni.so \
libarrow_dataset_jni.so \
libarrow_orc_jni.so \
libgandiva_jni.so
libgandiva_jni.so \
libplasma_java.so
popd
9 changes: 6 additions & 3 deletions ci/scripts/java_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@

set -ex

if [[ "${ARROW_JAVA_TEST:-ON}" != "ON" ]]; then
exit
fi

arrow_dir=${1}
source_dir=${1}/java
cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug}
java_jni_dist_dir=${3}

# For JNI and Plasma tests
Expand All @@ -36,7 +39,7 @@ pushd ${source_dir}
${mvn} test

if [ "${ARROW_JNI}" = "ON" ]; then
${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir}
${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${java_jni_dist_dir}
fi

if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then
Expand All @@ -46,7 +49,7 @@ fi
if [ "${ARROW_PLASMA}" = "ON" ]; then
pushd ${source_dir}/plasma
java -cp target/test-classes:target/classes \
-Djava.library.path=${cpp_build_dir} \
-Djava.library.path=${java_jni_dist_dir} \
org.apache.arrow.plasma.PlasmaClientTest
popd
fi
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/plasma/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,14 +185,14 @@ if(ARROW_PLASMA_JAVA_CLIENT)

if(APPLE)
target_link_libraries(plasma_java
plasma_shared
${PLASMA_LINK_LIBS}
plasma_static
${PLASMA_STATIC_LINK_LIBS}
"-undefined dynamic_lookup"
${PTHREAD_LIBRARY})
else(APPLE)
target_link_libraries(plasma_java plasma_shared ${PLASMA_LINK_LIBS}
else()
target_link_libraries(plasma_java plasma_static ${PLASMA_STATIC_LINK_LIBS}
${PTHREAD_LIBRARY})
endif(APPLE)
endif()
endif()
#
# Unit tests
Expand Down
11 changes: 10 additions & 1 deletion dev/tasks/java-jars/github.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ jobs:
{{ macros.github_checkout_arrow()|indent }}
{{ macros.github_install_archery()|indent }}
- name: Build C++ Libs
run: archery docker run java-jni-manylinux-2014
run: |
archery docker run \
-e ARROW_JAVA_BUILD=OFF \
-e ARROW_JAVA_TEST=OFF \
java-jni-manylinux-2014
- name: Compress into single artifact to keep directory structure
run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/java-dist/
- name: Upload Artifacts
Expand Down Expand Up @@ -61,6 +65,9 @@ jobs:
# aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's
# aws-sdk-cpp to ensure using only bundled aws-sdk-cpp.
brew uninstall aws-sdk-cpp
- name: Setup ccache
run: |
arrow/ci/scripts/ccache_setup.sh
- name: Build C++ Libs
run: |
set -e
Expand Down Expand Up @@ -100,11 +107,13 @@ jobs:
test -f arrow/java-dist/libarrow_cdata_jni.dylib
test -f arrow/java-dist/libarrow_dataset_jni.dylib
test -f arrow/java-dist/libgandiva_jni.dylib
test -f arrow/java-dist/libplasma_java.dylib
test -f arrow/java-dist/libarrow_orc_jni.dylib
test -f arrow/java-dist/libarrow_cdata_jni.so
test -f arrow/java-dist/libarrow_dataset_jni.so
test -f arrow/java-dist/libarrow_orc_jni.so
test -f arrow/java-dist/libgandiva_jni.so
test -f arrow/java-dist/libplasma_java.so
- name: Build Bundled Jar
run: |
set -e
Expand Down
40 changes: 6 additions & 34 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ x-hierarchy:
- debian-go-cgo
- debian-go-cgo-python
- debian-java
- debian-java-jni
- debian-js
- fedora-cpp:
- fedora-python
Expand Down Expand Up @@ -999,10 +998,14 @@ services:
<<: *ccache
volumes:
- .:/arrow:delegated
- ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated
- ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated
command:
["pip install -e /arrow/dev/archery &&
/arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist"]
["pip install -e /arrow/dev/archery && \
/arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist && \
source /opt/rh/rh-maven35/enable && \
/arrow/ci/scripts/java_build.sh /arrow /build /arrow/java-dist && \
/arrow/ci/scripts/java_test.sh /arrow /build /arrow/java-dist"]

############################## Integration #################################

Expand Down Expand Up @@ -1541,37 +1544,6 @@ services:
/arrow/ci/scripts/java_build.sh /arrow /build &&
/arrow/ci/scripts/java_test.sh /arrow /build"

debian-java-jni:
# Includes plasma test, jni for gandiva and orc, and C data interface.
# Usage:
# docker-compose build debian-java
# docker-compose build debian-java-jni
# docker-compose run debian-java-jni
image: ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni
build:
context: .
dockerfile: ci/docker/linux-apt-jni.dockerfile
cache_from:
- ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni
args:
llvm: ${LLVM}
shm_size: *shm-size
environment:
<<: *ccache
ARROW_BUILD_TESTS: "OFF"
ARROW_S3: "OFF"
ARROW_SUBSTRAIT: "OFF"
volumes:
- .:/arrow:delegated
- ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated
- ${DOCKER_VOLUME_PREFIX}debian-ccache:/ccache:delegated
command:
/bin/bash -c "
/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/java_jni_build.sh /arrow /build /tmp/java_dist &&
/arrow/ci/scripts/java_build.sh /arrow /build /tmp/java_dist &&
/arrow/ci/scripts/java_test.sh /arrow /build /tmp/java_dist"

oracle-java:
# Usage:
# docker-compose build oracle-java
Expand Down

0 comments on commit bd02afa

Please sign in to comment.