Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-17451: [CI][Java] Use manylinux2014 image for JNI #13920

Merged
merged 6 commits into from
Aug 23, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/java_jni.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ env:
jobs:

docker:
name: AMD64 Debian 9 Java JNI (Gandiva, Plasma, ORC, Dataset)
name: AMD64 manylinux2014 Java JNI
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 90
Expand All @@ -70,14 +70,14 @@ jobs:
- name: Setup Archery
run: pip install -e dev/archery[docker]
- name: Execute Docker Build
run: archery docker run debian-java-jni
run: archery docker run java-jni-manylinux-2014
- name: Docker Push
if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
continue-on-error: true
run: archery docker push debian-java-jni
run: archery docker push java-jni-manylinux-2014

docker_integration_python:
name: AMD64 Debian 9 Java C Data Interface Integration
name: AMD64 Conda Java C Data Interface Integration
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 90
Expand Down
9 changes: 8 additions & 1 deletion ci/docker/java-jni-manylinux-201x.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,12 @@ RUN vcpkg install \

# Install Java
ARG java=1.8.0
RUN yum install -y java-$java-openjdk-devel && yum clean all
RUN yum install -y java-$java-openjdk-devel rh-maven35 && yum clean all
ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/

# For ci/scripts/java_*.sh
ENV ARROW_GANDIVA_JAVA=ON \
ARROW_HOME=/tmp/local \
ARROW_JAVA_CDATA=ON \
ARROW_JNI=ON \
ARROW_PLASMA=ON
7 changes: 5 additions & 2 deletions ci/scripts/java_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@

set -ex

if [[ "${ARROW_JAVA_BUILD:-ON}" != "ON" ]]; then
exit
fi

arrow_dir=${1}
source_dir=${1}/java
build_dir=${2}
cpp_build_dir=${build_dir}/cpp/${ARROW_BUILD_TYPE:-debug}
java_jni_dist_dir=${3}

: ${BUILD_DOCS_JAVA:=OFF}
Expand Down Expand Up @@ -80,7 +83,7 @@ if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then
fi

if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then
${mvn} -Darrow.cpp.build.dir=${cpp_build_dir} -Parrow-jni install
${mvn} -Darrow.cpp.build.dir=${java_jni_dist_dir} -Parrow-jni install
fi

if [ "${ARROW_PLASMA}" = "ON" ]; then
Expand Down
13 changes: 8 additions & 5 deletions ci/scripts/java_jni_macos_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ echo "=== Clear output directories and leftovers ==="
rm -rf ${build_dir}

echo "=== Building Arrow C++ libraries ==="
install_dir=${build_dir}/cpp-install
: ${ARROW_BUILD_TESTS:=OFF}
: ${ARROW_DATASET:=ON}
: ${ARROW_FILESYSTEM:=ON}
Expand Down Expand Up @@ -80,7 +81,7 @@ cmake \
-DAWSSDK_SOURCE=BUNDLED \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX=${build_dir}/cpp \
-DCMAKE_INSTALL_PREFIX=${install_dir} \
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
-DPARQUET_BUILD_EXAMPLES=OFF \
-DPARQUET_BUILD_EXECUTABLES=OFF \
Expand All @@ -105,9 +106,10 @@ ${arrow_dir}/ci/scripts/java_jni_build.sh \

echo "=== Copying libraries to the distribution folder ==="
mkdir -p "${dist_dir}"
cp -L ${build_dir}/cpp/lib/libgandiva_jni.dylib ${dist_dir}
cp -L ${build_dir}/cpp/lib/libarrow_dataset_jni.dylib ${dist_dir}
cp -L ${build_dir}/cpp/lib/libarrow_orc_jni.dylib ${dist_dir}
cp -L ${install_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir}
cp -L ${install_dir}/lib/libarrow_orc_jni.dylib ${dist_dir}
cp -L ${install_dir}/lib/libgandiva_jni.dylib ${dist_dir}
cp -L ${build_dir}/cpp/*/libplasma_java.dylib ${dist_dir}

echo "=== Checking shared dependencies for libraries ==="

Expand All @@ -126,5 +128,6 @@ archery linking check-dependencies \
libarrow_cdata_jni.dylib \
libarrow_dataset_jni.dylib \
libarrow_orc_jni.dylib \
libgandiva_jni.dylib
libgandiva_jni.dylib \
libplasma_java.dylib
popd
12 changes: 7 additions & 5 deletions ci/scripts/java_jni_manylinux_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ cmake \
-DARROW_ZSTD_USE_SHARED=OFF \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX=${build_dir}/cpp \
-DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
-DORC_SOURCE=BUNDLED \
-DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \
Expand Down Expand Up @@ -128,9 +128,10 @@ ${arrow_dir}/ci/scripts/java_jni_build.sh \


echo "=== Copying libraries to the distribution folder ==="
cp -L ${build_dir}/cpp/lib/libgandiva_jni.so ${dist_dir}
cp -L ${build_dir}/cpp/lib/libarrow_dataset_jni.so ${dist_dir}
cp -L ${build_dir}/cpp/lib/libarrow_orc_jni.so ${dist_dir}
cp -L ${ARROW_HOME}/lib/libarrow_dataset_jni.so ${dist_dir}
cp -L ${ARROW_HOME}/lib/libarrow_orc_jni.so ${dist_dir}
cp -L ${ARROW_HOME}/lib/libgandiva_jni.so ${dist_dir}
cp -L ${build_dir}/cpp/*/libplasma_java.so ${dist_dir}

echo "=== Checking shared dependencies for libraries ==="

Expand All @@ -149,5 +150,6 @@ archery linking check-dependencies \
libarrow_cdata_jni.so \
libarrow_dataset_jni.so \
libarrow_orc_jni.so \
libgandiva_jni.so
libgandiva_jni.so \
libplasma_java.so
popd
9 changes: 6 additions & 3 deletions ci/scripts/java_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@

set -ex

if [[ "${ARROW_JAVA_TEST:-ON}" != "ON" ]]; then
exit
fi

arrow_dir=${1}
source_dir=${1}/java
cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug}
java_jni_dist_dir=${3}

# For JNI and Plasma tests
Expand All @@ -36,7 +39,7 @@ pushd ${source_dir}
${mvn} test

if [ "${ARROW_JNI}" = "ON" ]; then
${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir}
${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${java_jni_dist_dir}
fi

if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then
Expand All @@ -46,7 +49,7 @@ fi
if [ "${ARROW_PLASMA}" = "ON" ]; then
pushd ${source_dir}/plasma
java -cp target/test-classes:target/classes \
-Djava.library.path=${cpp_build_dir} \
-Djava.library.path=${java_jni_dist_dir} \
org.apache.arrow.plasma.PlasmaClientTest
popd
fi
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/plasma/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,14 +185,14 @@ if(ARROW_PLASMA_JAVA_CLIENT)

if(APPLE)
target_link_libraries(plasma_java
plasma_shared
${PLASMA_LINK_LIBS}
plasma_static
${PLASMA_STATIC_LINK_LIBS}
"-undefined dynamic_lookup"
${PTHREAD_LIBRARY})
else(APPLE)
target_link_libraries(plasma_java plasma_shared ${PLASMA_LINK_LIBS}
else()
target_link_libraries(plasma_java plasma_static ${PLASMA_STATIC_LINK_LIBS}
${PTHREAD_LIBRARY})
endif(APPLE)
endif()
endif()
#
# Unit tests
Expand Down
8 changes: 7 additions & 1 deletion dev/tasks/java-jars/github.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ jobs:
{{ macros.github_checkout_arrow()|indent }}
{{ macros.github_install_archery()|indent }}
- name: Build C++ Libs
run: archery docker run java-jni-manylinux-2014
run: |
archery docker run \
-e ARROW_JAVA_BUILD=OFF \
-e ARROW_JAVA_TEST=OFF \
java-jni-manylinux-2014
- name: Compress into single artifact to keep directory structure
run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/java-dist/
- name: Upload Artifacts
Expand Down Expand Up @@ -100,11 +104,13 @@ jobs:
test -f arrow/java-dist/libarrow_cdata_jni.dylib
test -f arrow/java-dist/libarrow_dataset_jni.dylib
test -f arrow/java-dist/libgandiva_jni.dylib
test -f arrow/java-dist/libplasma_jni.dylib
test -f arrow/java-dist/libarrow_orc_jni.dylib
test -f arrow/java-dist/libarrow_cdata_jni.so
test -f arrow/java-dist/libarrow_dataset_jni.so
test -f arrow/java-dist/libarrow_orc_jni.so
test -f arrow/java-dist/libgandiva_jni.so
test -f arrow/java-dist/libplasma_jni.so
- name: Build Bundled Jar
run: |
set -e
Expand Down
40 changes: 6 additions & 34 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ x-hierarchy:
- debian-go-cgo
- debian-go-cgo-python
- debian-java
- debian-java-jni
- debian-js
- fedora-cpp:
- fedora-python
Expand Down Expand Up @@ -999,10 +998,14 @@ services:
<<: *ccache
volumes:
- .:/arrow:delegated
- ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated
- ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated
command:
["pip install -e /arrow/dev/archery &&
/arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist"]
["pip install -e /arrow/dev/archery && \
/arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist && \
source /opt/rh/rh-maven35/enable && \
/arrow/ci/scripts/java_build.sh /arrow /build /arrow/java-dist && \
/arrow/ci/scripts/java_test.sh /arrow /build /arrow/java-dist"]

############################## Integration #################################

Expand Down Expand Up @@ -1541,37 +1544,6 @@ services:
/arrow/ci/scripts/java_build.sh /arrow /build &&
/arrow/ci/scripts/java_test.sh /arrow /build"

debian-java-jni:
# Includes plasma test, jni for gandiva and orc, and C data interface.
# Usage:
# docker-compose build debian-java
# docker-compose build debian-java-jni
# docker-compose run debian-java-jni
image: ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni
build:
context: .
dockerfile: ci/docker/linux-apt-jni.dockerfile
cache_from:
- ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni
args:
llvm: ${LLVM}
shm_size: *shm-size
environment:
<<: *ccache
ARROW_BUILD_TESTS: "OFF"
ARROW_S3: "OFF"
ARROW_SUBSTRAIT: "OFF"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you reuse these environment variables in the java-manylinux build?

Copy link
Member Author

@kou kou Aug 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use ARROW_BUILD_TESTS=OFF and ARROW_SUBSTRAIT=OFF and they are already used.
(But I want to enable ARROW_BUILD_TESTS in ARROW-17081 / #13911 to run a Dataset JNI test that uses arrow_testing.)

We can't use ARROW_S3=OFF because our JNI packages enable S3 recently: ARROW-16584 / #13157

volumes:
- .:/arrow:delegated
- ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated
- ${DOCKER_VOLUME_PREFIX}debian-ccache:/ccache:delegated
command:
/bin/bash -c "
/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/java_jni_build.sh /arrow /build /tmp/java_dist &&
/arrow/ci/scripts/java_build.sh /arrow /build /tmp/java_dist &&
/arrow/ci/scripts/java_test.sh /arrow /build /tmp/java_dist"

oracle-java:
# Usage:
# docker-compose build oracle-java
Expand Down