diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile index de953fd5ae057..c77ec63df74f4 100644 --- a/ci/docker/java-jni-manylinux-201x.dockerfile +++ b/ci/docker/java-jni-manylinux-201x.dockerfile @@ -24,6 +24,7 @@ RUN vcpkg install \ --clean-after-build \ --x-install-root=${VCPKG_ROOT}/installed \ --x-manifest-root=/arrow/ci/vcpkg \ + --x-feature=dev \ --x-feature=flight \ --x-feature=gcs \ --x-feature=json \ @@ -36,7 +37,7 @@ ARG java=1.8.0 RUN yum install -y java-$java-openjdk-devel rh-maven35 && yum clean all ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/ -# For ci/scripts/java_*.sh +# For ci/scripts/{cpp,java}_*.sh ENV ARROW_GANDIVA_JAVA=ON \ ARROW_HOME=/tmp/local \ ARROW_JAVA_CDATA=ON \ diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh index 0f19e61413305..c68b52d77ef51 100755 --- a/ci/scripts/java_jni_build.sh +++ b/ci/scripts/java_jni_build.sh @@ -20,9 +20,10 @@ set -ex arrow_dir=${1} -build_dir=${2}/java_jni +arrow_install_dir=${2} +build_dir=${3}/java_jni # The directory where the final binaries will be stored when scripts finish -dist_dir=${3} +dist_dir=${4} echo "=== Clear output directories and leftovers ===" # Clear output directories and leftovers @@ -32,11 +33,37 @@ echo "=== Building Arrow Java C Data Interface native library ===" mkdir -p "${build_dir}" pushd "${build_dir}" +case "$(uname)" in + Linux) + n_jobs=$(nproc) + ;; + Darwin) + n_jobs=$(sysctl -n hw.ncpu) + ;; + *) + n_jobs=${NPROC:-1} + ;; +esac + +: ${ARROW_JAVA_BUILD_TESTS:=${ARROW_BUILD_TESTS:-OFF}} +: ${CMAKE_BUILD_TYPE:=release} cmake \ - -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-release} \ + -DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_DATASET:-ON} \ + -DBUILD_TESTING=${ARROW_JAVA_BUILD_TESTS} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_PREFIX_PATH=${arrow_install_dir} \ -DCMAKE_INSTALL_PREFIX=${dist_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + -GNinja \ ${JAVA_JNI_CMAKE_ARGS:-} \ ${arrow_dir}/java -cmake --build . --target install --config ${ARROW_BUILD_TYPE:-release} +export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs} +cmake --build . --config ${CMAKE_BUILD_TYPE} +if [ "${ARROW_JAVA_BUILD_TESTS}" = "ON" ]; then + ctest \ + --output-on-failure \ + --parallel ${n_jobs} \ + --timeout 300 +fi +cmake --build . --config ${CMAKE_BUILD_TYPE} --target install popd diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 5418daaf0113b..342bc2d118845 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -30,7 +30,7 @@ rm -rf ${build_dir} echo "=== Building Arrow C++ libraries ===" install_dir=${build_dir}/cpp-install -: ${ARROW_BUILD_TESTS:=OFF} +: ${ARROW_BUILD_TESTS:=ON} : ${ARROW_DATASET:=ON} : ${ARROW_FILESYSTEM:=ON} : ${ARROW_GANDIVA_JAVA:=ON} @@ -39,7 +39,6 @@ install_dir=${build_dir}/cpp-install : ${ARROW_PARQUET:=ON} : ${ARROW_PLASMA_JAVA_CLIENT:=ON} : ${ARROW_PLASMA:=ON} -: ${ARROW_PYTHON:=OFF} : ${ARROW_S3:=ON} : ${ARROW_USE_CCACHE:=OFF} : ${CMAKE_BUILD_TYPE:=Release} @@ -58,33 +57,23 @@ mkdir -p "${build_dir}/cpp" pushd "${build_dir}/cpp" cmake \ - -DARROW_BOOST_USE_SHARED=OFF \ - -DARROW_BROTLI_USE_SHARED=OFF \ + -DARROW_BUILD_SHARED=OFF \ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ -DARROW_BUILD_UTILITIES=OFF \ - -DARROW_BZ2_USE_SHARED=OFF \ + -DARROW_CSV=${ARROW_DATASET} \ -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \ -DARROW_GANDIVA=${ARROW_GANDIVA} \ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \ -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \ - -DARROW_GFLAGS_USE_SHARED=OFF \ - -DARROW_GRPC_USE_SHARED=OFF \ -DARROW_JNI=ON \ - -DARROW_LZ4_USE_SHARED=OFF \ - -DARROW_OPENSSL_USE_SHARED=OFF \ -DARROW_ORC=${ARROW_ORC} \ -DARROW_PARQUET=${ARROW_PARQUET} \ -DARROW_PLASMA=${ARROW_PLASMA} \ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \ - -DARROW_PROTOBUF_USE_SHARED=OFF \ - -DARROW_PYTHON=${ARROW_PYTHON} \ -DARROW_S3=${ARROW_S3} \ - -DARROW_SNAPPY_USE_SHARED=OFF \ - -DARROW_THRIFT_USE_SHARED=OFF \ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ - -DARROW_UTF8PROC_USE_SHARED=OFF \ - -DARROW_ZSTD_USE_SHARED=OFF \ -DAWSSDK_SOURCE=BUNDLED \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_LIBDIR=lib \ @@ -99,7 +88,16 @@ cmake \ cmake --build . --target install if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then - ctest + # MinIO is required + exclude_tests="arrow-s3fs-test" + # unstable + exclude_tests="${exclude_tests}|arrow-compute-hash-join-node-test" + ctest \ + --exclude-regex "${exclude_tests}" \ + --label-regex unittest \ + --output-on-failure \ + --parallel $(sysctl -n hw.ncpu) \ + --timeout 300 fi popd @@ -107,6 +105,7 @@ popd ${arrow_dir}/ci/scripts/java_jni_build.sh \ ${arrow_dir} \ + ${install_dir} \ ${build_dir} \ ${dist_dir} @@ -117,7 +116,6 @@ fi echo "=== Copying libraries to the distribution folder ===" mkdir -p "${dist_dir}" -cp -L ${install_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir} cp -L ${install_dir}/lib/libarrow_orc_jni.dylib ${dist_dir} cp -L ${install_dir}/lib/libgandiva_jni.dylib ${dist_dir} cp -L ${build_dir}/cpp/*/libplasma_java.dylib ${dist_dir} diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 331d74b34a1f4..6669c4fdaa6d2 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -32,7 +32,7 @@ echo "=== Building Arrow C++ libraries ===" devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ grep -o "^[0-9]*") devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" -: ${ARROW_BUILD_TESTS:=OFF} +: ${ARROW_BUILD_TESTS:=ON} : ${ARROW_DATASET:=ON} : ${ARROW_GANDIVA:=ON} : ${ARROW_GANDIVA_JAVA:=ON} @@ -43,10 +43,9 @@ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/includ : ${ARROW_PARQUET:=ON} : ${ARROW_PLASMA:=ON} : ${ARROW_PLASMA_JAVA_CLIENT:=ON} -: ${ARROW_PYTHON:=OFF} : ${ARROW_S3:=ON} : ${ARROW_USE_CCACHE:=OFF} -: ${CMAKE_BUILD_TYPE:=Release} +: ${CMAKE_BUILD_TYPE:=release} : ${CMAKE_UNITY_BUILD:=ON} : ${VCPKG_ROOT:=/opt/vcpkg} : ${VCPKG_FEATURE_FLAGS:=-manifests} @@ -66,36 +65,26 @@ mkdir -p "${build_dir}/cpp" pushd "${build_dir}/cpp" cmake \ - -DARROW_BOOST_USE_SHARED=OFF \ - -DARROW_BROTLI_USE_SHARED=OFF \ - -DARROW_BUILD_SHARED=ON \ - -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ + -DARROW_BUILD_SHARED=OFF \ + -DARROW_BUILD_TESTS=ON \ -DARROW_BUILD_UTILITIES=OFF \ - -DARROW_BZ2_USE_SHARED=OFF \ + -DARROW_CSV=${ARROW_DATASET} \ -DARROW_DATASET=${ARROW_DATASET} \ -DARROW_DEPENDENCY_SOURCE="VCPKG" \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \ -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \ -DARROW_GANDIVA=${ARROW_GANDIVA} \ - -DARROW_GRPC_USE_SHARED=OFF \ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ -DARROW_JNI=ON \ - -DARROW_LZ4_USE_SHARED=OFF \ - -DARROW_OPENSSL_USE_SHARED=OFF \ -DARROW_ORC=${ARROW_ORC} \ -DARROW_PARQUET=${ARROW_PARQUET} \ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \ -DARROW_PLASMA=${ARROW_PLASMA} \ - -DARROW_PROTOBUF_USE_SHARED=OFF \ - -DARROW_PYTHON=${ARROW_PYTHON} \ -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \ -DARROW_S3=${ARROW_S3} \ - -DARROW_SNAPPY_USE_SHARED=OFF \ - -DARROW_THRIFT_USE_SHARED=OFF \ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ - -DARROW_UTF8PROC_USE_SHARED=OFF \ - -DARROW_ZSTD_USE_SHARED=OFF \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ @@ -105,16 +94,22 @@ cmake \ -DPARQUET_BUILD_EXAMPLES=OFF \ -DPARQUET_BUILD_EXECUTABLES=OFF \ -DPARQUET_REQUIRE_ENCRYPTION=OFF \ - -DPythonInterp_FIND_VERSION_MAJOR=3 \ - -DPythonInterp_FIND_VERSION=ON \ -DVCPKG_MANIFEST_MODE=OFF \ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ -GNinja \ ${arrow_dir}/cpp ninja install -if [ $ARROW_BUILD_TESTS = "ON" ]; then +if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then + # MinIO is required + exclude_tests="arrow-s3fs-test" + # unstable + exclude_tests="${exclude_tests}|arrow-compute-hash-join-node-test" + exclude_tests="${exclude_tests}|arrow-dataset-scanner-test" + # strptime + exclude_tests="${exclude_tests}|arrow-utility-test" ctest \ + --exclude-regex "${exclude_tests}" \ --label-regex unittest \ --output-on-failure \ --parallel $(nproc) \ @@ -125,11 +120,12 @@ popd JAVA_JNI_CMAKE_ARGS="" -JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_MANIFEST_MODE=OFF" +JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}" export JAVA_JNI_CMAKE_ARGS ${arrow_dir}/ci/scripts/java_jni_build.sh \ ${arrow_dir} \ + ${ARROW_HOME} \ ${build_dir} \ ${dist_dir} @@ -140,7 +136,6 @@ fi echo "=== Copying libraries to the distribution folder ===" -cp -L ${ARROW_HOME}/lib/libarrow_dataset_jni.so ${dist_dir} cp -L ${ARROW_HOME}/lib/libarrow_orc_jni.so ${dist_dir} cp -L ${ARROW_HOME}/lib/libgandiva_jni.so ${dist_dir} cp -L ${build_dir}/cpp/*/libplasma_java.so ${dist_dir} diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json index d9d074e99b073..71c23165e61f0 100644 --- a/ci/vcpkg/vcpkg.json +++ b/ci/vcpkg/vcpkg.json @@ -43,6 +43,7 @@ "description": "Development dependencies", "dependencies": [ "benchmark", + "boost-process", "gtest" ] }, diff --git a/cpp/Brewfile b/cpp/Brewfile index 9cffd8e3a8184..61fb619dc66bd 100644 --- a/cpp/Brewfile +++ b/cpp/Brewfile @@ -26,6 +26,7 @@ brew "cmake" brew "flatbuffers" brew "git" brew "glog" +brew "googletest" brew "grpc" brew "llvm" brew "llvm@12" @@ -39,4 +40,5 @@ brew "rapidjson" brew "snappy" brew "thrift" brew "wget" +brew "xsimd" brew "zstd" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4c0d8f1e91bc2..6a01f18e6bb74 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -786,6 +786,19 @@ endif() if(ARROW_S3) list(APPEND ARROW_SHARED_LINK_LIBS ${AWSSDK_LINK_LIBRARIES}) list(APPEND ARROW_STATIC_LINK_LIBS ${AWSSDK_LINK_LIBRARIES}) + if(AWSSDK_SOURCE STREQUAL "SYSTEM") + list(APPEND + ARROW_STATIC_INSTALL_INTERFACE_LIBS + aws-cpp-sdk-identity-management + aws-cpp-sdk-sts + aws-cpp-sdk-cognito-identity + aws-cpp-sdk-s3 + aws-cpp-sdk-core) + elseif(AWSSDK_SOURCE STREQUAL "BUNDLED") + if(UNIX) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) + endif() + endif() endif() if(ARROW_WITH_OPENTELEMETRY) @@ -851,6 +864,9 @@ add_dependencies(arrow_test_dependencies toolchain-tests) if(ARROW_STATIC_LINK_LIBS) add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS}) if(ARROW_HDFS OR ARROW_ORC) + if(Protobuf_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF}) + endif() if(NOT MSVC_TOOLCHAIN) list(APPEND ARROW_STATIC_LINK_LIBS ${CMAKE_DL_LIBS}) list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS}) @@ -977,9 +993,6 @@ if(ARROW_JNI) if(ARROW_ORC) add_subdirectory(../java/adapter/orc/src/main/cpp ./java/orc/jni) endif() - if(ARROW_DATASET) - add_subdirectory(../java/dataset/src/main/cpp ./java/dataset/jni) - endif() endif() if(ARROW_GANDIVA) diff --git a/cpp/cmake_modules/FindAWSSDKAlt.cmake b/cpp/cmake_modules/FindAWSSDKAlt.cmake new file mode 100644 index 0000000000000..611184aa1d172 --- /dev/null +++ b/cpp/cmake_modules/FindAWSSDKAlt.cmake @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(find_package_args) +if(AWSSDKAlt_FIND_VERSION) + list(APPEND find_package_args ${AWSSDKAlt_FIND_VERSION}) +endif() +if(AWSSDKAlt_FIND_QUIETLY) + list(APPEND find_package_args QUIET) +endif() +# See https://aws.amazon.com/blogs/developer/developer-experience-of-the-aws-sdk-for-c-now-simplified-by-cmake/ +# Workaround to force AWS CMake configuration to look for shared libraries +if(DEFINED ENV{CONDA_PREFIX}) + if(DEFINED BUILD_SHARED_LIBS) + set(BUILD_SHARED_LIBS_WAS_SET TRUE) + set(BUILD_SHARED_LIBS_KEEP ${BUILD_SHARED_LIBS}) + else() + set(BUILD_SHARED_LIBS_WAS_SET FALSE) + endif() + set(BUILD_SHARED_LIBS ON) +endif() +find_package(AWSSDK ${find_package_args} + COMPONENTS config + s3 + transfer + identity-management + sts) +# Restore previous value of BUILD_SHARED_LIBS +if(DEFINED ENV{CONDA_PREFIX}) + if(BUILD_SHARED_LIBS_WAS_SET) + set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_KEEP}) + else() + unset(BUILD_SHARED_LIBS) + endif() +endif() +set(AWSSDKAlt_FOUND ${AWSSDK_FOUND}) diff --git a/cpp/cmake_modules/FindProtobufAlt.cmake b/cpp/cmake_modules/FindProtobufAlt.cmake new file mode 100644 index 0000000000000..d29f757aeb659 --- /dev/null +++ b/cpp/cmake_modules/FindProtobufAlt.cmake @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(ARROW_PROTOBUF_USE_SHARED) + set(Protobuf_USE_STATIC_LIBS OFF) +else() + set(Protobuf_USE_STATIC_LIBS ON) +endif() + +set(find_package_args) +if(ProtobufAlt_FIND_VERSION) + list(APPEND find_package_args ${ProtobufAlt_FIND_VERSION}) +endif() +if(ProtobufAlt_FIND_QUIETLY) + list(APPEND find_package_args QUIET) +endif() +find_package(Protobuf ${find_package_args}) +set(ProtobufAlt_FOUND ${Protobuf_FOUND}) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 7c3e3a533229d..515cdfe8ef494 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -40,12 +40,6 @@ set(ARROW_RE2_LINKAGE "static" CACHE STRING "How to link the re2 library. static|shared (default static)") -if(ARROW_PROTOBUF_USE_SHARED) - set(Protobuf_USE_STATIC_LIBS OFF) -else() - set(Protobuf_USE_STATIC_LIBS ON) -endif() - # ---------------------------------------------------------------------- # Resolve the dependencies @@ -1640,6 +1634,8 @@ if(ARROW_WITH_PROTOBUF) set(ARROW_PROTOBUF_REQUIRED_VERSION "2.6.1") endif() resolve_dependency(Protobuf + HAVE_ALT + TRUE REQUIRED_VERSION ${ARROW_PROTOBUF_REQUIRED_VERSION} PC_PACKAGE_NAMES @@ -4746,49 +4742,7 @@ macro(build_awssdk) endmacro() if(ARROW_S3) - # See https://aws.amazon.com/blogs/developer/developer-experience-of-the-aws-sdk-for-c-now-simplified-by-cmake/ - - # Workaround to force AWS CMake configuration to look for shared libraries - if(DEFINED ENV{CONDA_PREFIX}) - if(DEFINED BUILD_SHARED_LIBS) - set(BUILD_SHARED_LIBS_WAS_SET TRUE) - set(BUILD_SHARED_LIBS_VALUE ${BUILD_SHARED_LIBS}) - else() - set(BUILD_SHARED_LIBS_WAS_SET FALSE) - endif() - set(BUILD_SHARED_LIBS "ON") - endif() - - # Need to customize the find_package() call, so cannot call resolve_dependency() - if(AWSSDK_SOURCE STREQUAL "AUTO") - find_package(AWSSDK - COMPONENTS config - s3 - transfer - identity-management - sts) - if(NOT AWSSDK_FOUND) - build_awssdk() - endif() - elseif(AWSSDK_SOURCE STREQUAL "BUNDLED") - build_awssdk() - elseif(AWSSDK_SOURCE STREQUAL "SYSTEM") - find_package(AWSSDK REQUIRED - COMPONENTS config - s3 - transfer - identity-management - sts) - endif() - - # Restore previous value of BUILD_SHARED_LIBS - if(DEFINED ENV{CONDA_PREFIX}) - if(BUILD_SHARED_LIBS_WAS_SET) - set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_VALUE}) - else() - unset(BUILD_SHARED_LIBS) - endif() - endif() + resolve_dependency(AWSSDK HAVE_ALT TRUE) message(STATUS "Found AWS SDK headers: ${AWSSDK_INCLUDE_DIR}") message(STATUS "Found AWS SDK libraries: ${AWSSDK_LINK_LIBRARIES}") diff --git a/cpp/src/arrow/ArrowConfig.cmake.in b/cpp/src/arrow/ArrowConfig.cmake.in index 0aa298b665844..f0aa1bc959b0e 100644 --- a/cpp/src/arrow/ArrowConfig.cmake.in +++ b/cpp/src/arrow/ArrowConfig.cmake.in @@ -102,6 +102,16 @@ if(TARGET Arrow::arrow_static AND NOT TARGET Arrow::arrow_bundled_dependencies) PROPERTIES IMPORTED_LOCATION "${arrow_lib_dir}/${CMAKE_STATIC_LIBRARY_PREFIX}arrow_bundled_dependencies${CMAKE_STATIC_LIBRARY_SUFFIX}" ) + + # CMP0057: Support new if() IN_LIST operator. + # https://cmake.org/cmake/help/latest/policy/CMP0057.html + cmake_policy(PUSH) + cmake_policy(SET CMP0057 NEW) + if(APPLE AND "AWS::aws-c-common" IN_LIST ARROW_BUNDLED_STATIC_LIBS) + find_library(CORE_FOUNDATION CoreFoundation) + target_link_libraries(Arrow::arrow_bundled_dependencies INTERFACE ${CORE_FOUNDATION}) + endif() + cmake_policy(POP) endif() macro(arrow_keep_backward_compatibility namespace target_base_name) diff --git a/cpp/src/arrow/dataset/api.h b/cpp/src/arrow/dataset/api.h index 8b81f4c15d171..6e8aab5e9ea8c 100644 --- a/cpp/src/arrow/dataset/api.h +++ b/cpp/src/arrow/dataset/api.h @@ -23,8 +23,14 @@ #include "arrow/dataset/dataset.h" #include "arrow/dataset/discovery.h" #include "arrow/dataset/file_base.h" +#ifdef ARROW_CSV #include "arrow/dataset/file_csv.h" +#endif #include "arrow/dataset/file_ipc.h" +#ifdef ARROW_ORC #include "arrow/dataset/file_orc.h" +#endif +#ifdef ARROW_PARQUET #include "arrow/dataset/file_parquet.h" +#endif #include "arrow/dataset/scanner.h" diff --git a/cpp/src/arrow/filesystem/s3_internal.h b/cpp/src/arrow/filesystem/s3_internal.h index 0943037aef0ef..093fdc7ca4577 100644 --- a/cpp/src/arrow/filesystem/s3_internal.h +++ b/cpp/src/arrow/filesystem/s3_internal.h @@ -43,7 +43,7 @@ namespace internal { enum class S3Backend { Amazon, Minio, Other }; // Detect the S3 backend type from the S3 server's response headers -S3Backend DetectS3Backend(const Aws::Http::HeaderValueCollection& headers) { +inline S3Backend DetectS3Backend(const Aws::Http::HeaderValueCollection& headers) { const auto it = headers.find("server"); if (it != headers.end()) { const auto& value = util::string_view(it->second); @@ -58,7 +58,7 @@ S3Backend DetectS3Backend(const Aws::Http::HeaderValueCollection& headers) { } template -S3Backend DetectS3Backend(const Aws::Client::AWSError& error) { +inline S3Backend DetectS3Backend(const Aws::Client::AWSError& error) { return DetectS3Backend(error.GetResponseHeaders()); } diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc index 1aafb5ec66c03..f5a054a8efa3c 100644 --- a/cpp/src/arrow/filesystem/s3_test_util.cc +++ b/cpp/src/arrow/filesystem/s3_test_util.cc @@ -31,7 +31,9 @@ // includes windows.h. boost/process/args.hpp is included before // boost/process/async.h that includes // boost/asio/detail/socket_types.hpp implicitly is included. +#ifdef __MINGW32__ #include +#endif // We need BOOST_USE_WINDOWS_H definition with MinGW when we use // boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in // cpp/cmake_modules/ThirdpartyToolchain.cmake for details. diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake index c987a0cae367e..9948c1e358711 100644 --- a/cpp/src/arrow/util/config.h.cmake +++ b/cpp/src/arrow/util/config.h.cmake @@ -46,6 +46,8 @@ #cmakedefine ARROW_JEMALLOC #cmakedefine ARROW_JEMALLOC_VENDORED #cmakedefine ARROW_JSON +#cmakedefine ARROW_ORC +#cmakedefine ARROW_PARQUET #cmakedefine ARROW_GCS #cmakedefine ARROW_S3 diff --git a/cpp/src/gandiva/jni/CMakeLists.txt b/cpp/src/gandiva/jni/CMakeLists.txt index 046934141f6d0..b89356121dcb0 100644 --- a/cpp/src/gandiva/jni/CMakeLists.txt +++ b/cpp/src/gandiva/jni/CMakeLists.txt @@ -76,9 +76,11 @@ add_arrow_lib(gandiva_jni ${GANDIVA_JNI_SOURCES} OUTPUTS GANDIVA_JNI_LIBRARIES - SHARED_PRIVATE_LINK_LIBS - ${GANDIVA_LINK_LIBS} - STATIC_LINK_LIBS + BUILD_SHARED + ON + BUILD_STATIC + OFF + SHARED_LINK_LIBS ${GANDIVA_LINK_LIBS} DEPENDENCIES ${GANDIVA_LINK_LIBS} diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index 23b97087c394b..f94a43a8b44cd 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -22,12 +22,12 @@ jobs: build-cpp-ubuntu: - name: Build C++ Libs Ubuntu + name: Build C++ libraries Ubuntu runs-on: ubuntu-latest steps: {{ macros.github_checkout_arrow()|indent }} {{ macros.github_install_archery()|indent }} - - name: Build C++ Libs + - name: Build C++ libraries run: | archery docker run \ -e ARROW_JAVA_BUILD=OFF \ @@ -35,27 +35,27 @@ jobs: java-jni-manylinux-2014 - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/java-dist/ - - name: Upload Artifacts + - name: Upload artifacts uses: actions/upload-artifact@v2 with: name: ubuntu-shared-lib path: arrow-shared-libs-linux.tar.gz {% if arrow.branch == 'master' %} {{ macros.github_login_dockerhub()|indent }} - - name: Push Docker Image + - name: Push Docker image shell: bash run: archery docker push java-jni-manylinux-2014 {% endif %} build-cpp-macos: - name: Build C++ Libs MacOS + name: Build C++ libraries macOS runs-on: macos-latest env: MACOSX_DEPLOYMENT_TARGET: "10.13" steps: {{ macros.github_checkout_arrow()|indent }} {{ macros.github_install_archery()|indent }} - - name: Install Dependencies + - name: Install dependencies run: | brew install --overwrite git brew bundle --file=arrow/cpp/Brewfile @@ -68,7 +68,7 @@ jobs: - name: Setup ccache run: | arrow/ci/scripts/ccache_setup.sh - - name: Build C++ Libs + - name: Build C++ libraries run: | set -e arrow/ci/scripts/java_jni_macos_build.sh \ @@ -77,14 +77,14 @@ jobs: $GITHUB_WORKSPACE/arrow/java-dist - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-macos.tar.gz arrow/java-dist/ - - name: Upload Artifacts + - name: Upload artifacts uses: actions/upload-artifact@v2 with: name: macos-shared-lib path: arrow-shared-libs-macos.tar.gz package-jars: - name: Build Jar Files + name: Build jar files runs-on: macos-latest needs: [build-cpp-macos, build-cpp-ubuntu] steps: @@ -93,7 +93,7 @@ jobs: uses: actions/download-artifact@v2 with: name: ubuntu-shared-lib - - name: Download MacOS C++ Library + - name: Download macOS C++ libraries uses: actions/download-artifact@v2 with: name: macos-shared-lib @@ -101,7 +101,7 @@ jobs: run: | tar -xvzf arrow-shared-libs-linux.tar.gz tar -xvzf arrow-shared-libs-macos.tar.gz - - name: Test that Shared Libraries Exist + - name: Test that shared libraries exist run: | set -x test -f arrow/java-dist/libarrow_cdata_jni.dylib @@ -114,7 +114,7 @@ jobs: test -f arrow/java-dist/libarrow_orc_jni.so test -f arrow/java-dist/libgandiva_jni.so test -f arrow/java-dist/libplasma_java.so - - name: Build Bundled Jar + - name: Build bundled jar run: | set -e pushd arrow/java diff --git a/docker-compose.yml b/docker-compose.yml index 751a81fa5540a..67dfd87512e9a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1167,7 +1167,7 @@ services: command: [ "/arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/java_jni_build.sh /arrow /build /tmp/dist/java && + /arrow/ci/scripts/java_jni_build.sh /arrow $${ARROW_HOME} /build /tmp/dist/java && /arrow/ci/scripts/java_build.sh /arrow /build /tmp/dist/java && /arrow/ci/scripts/java_cdata_integration.sh /arrow /tmp/dist/java" ] diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst index add2b11b27807..b45afa70a9d03 100644 --- a/docs/source/developers/java/building.rst +++ b/docs/source/developers/java/building.rst @@ -75,78 +75,70 @@ We can build these manually or we can use `Archery`_ to build them using a Docke |__ libarrow_dataset_jni.so |__ libarrow_orc_jni.so |__ libgandiva_jni.so + |__ libplasma_java.so Building JNI Libraries on MacOS ------------------------------- Note: If you are building on Apple Silicon, be sure to use a JDK version that was compiled for that architecture. See, for example, the `Azul JDK `_. -To build only the C Data Interface library: +First, you need to build Apache Arrow C++: .. code-block:: $ cd arrow $ brew bundle --file=cpp/Brewfile Homebrew Bundle complete! 25 Brewfile dependencies now installed. + $ brew uninstall aws-sdk-cpp + (We can't use aws-sdk-cpp installed by Homebrew because it has + an issue: https://github.com/aws/aws-sdk-cpp/issues/1809 ) $ export JAVA_HOME= - $ mkdir -p java-dist java-native-c - $ cd java-native-c + $ mkdir -p java-dist cpp-jni $ cmake \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=../java-dist/lib \ - ../java - $ cmake --build . --target install - $ ls -latr ../java-dist/lib - |__ libarrow_cdata_jni.dylib - -To build other JNI libraries: - -.. code-block:: - - $ cd arrow - $ brew bundle --file=cpp/Brewfile - Homebrew Bundle complete! 25 Brewfile dependencies now installed. - $ export JAVA_HOME= - $ mkdir -p java-dist java-native-cpp - $ cd java-native-cpp - $ cmake \ - -DARROW_BOOST_USE_SHARED=OFF \ - -DARROW_BROTLI_USE_SHARED=OFF \ - -DARROW_BZ2_USE_SHARED=OFF \ - -DARROW_GFLAGS_USE_SHARED=OFF \ - -DARROW_GRPC_USE_SHARED=OFF \ - -DARROW_LZ4_USE_SHARED=OFF \ - -DARROW_OPENSSL_USE_SHARED=OFF \ - -DARROW_PROTOBUF_USE_SHARED=OFF \ - -DARROW_SNAPPY_USE_SHARED=OFF \ - -DARROW_THRIFT_USE_SHARED=OFF \ - -DARROW_UTF8PROC_USE_SHARED=OFF \ - -DARROW_ZSTD_USE_SHARED=OFF \ - -DARROW_JNI=ON \ - -DARROW_PARQUET=ON \ - -DARROW_FILESYSTEM=ON \ + -S cpp \ + -B cpp-jni \ + -DARROW_CSV=ON \ -DARROW_DATASET=ON \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ + -DARROW_FILESYSTEM=ON \ + -DARROW_GANDIVA=ON \ -DARROW_GANDIVA_JAVA=ON \ -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \ - -DARROW_GANDIVA=ON \ + -DARROW_JNI=ON \ -DARROW_ORC=ON \ - -DARROW_PLASMA_JAVA_CLIENT=ON \ + -DARROW_PARQUET=ON \ -DARROW_PLASMA=ON \ + -DARROW_PLASMA_JAVA_CLIENT=ON \ + -DARROW_S3=ON \ + -DAWSSDK_SOURCE=BUNDLED \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_LIBDIR=lib \ - -DCMAKE_INSTALL_PREFIX=../java-dist \ + -DCMAKE_INSTALL_PREFIX=java-dist \ -DCMAKE_UNITY_BUILD=ON \ - -Dre2_SOURCE=BUNDLED \ - -DBoost_SOURCE=BUNDLED \ - -Dutf8proc_SOURCE=BUNDLED \ - -DSnappy_SOURCE=BUNDLED \ - -DORC_SOURCE=BUNDLED \ - -DZLIB_SOURCE=BUNDLED \ - ../cpp - $ cmake --build . --target install + -Dre2_SOURCE=BUNDLED + $ cmake --build cpp-jni --target install $ ls -latr ../java-dist/lib - |__ libarrow_dataset_jni.dylib |__ libarrow_orc_jni.dylib |__ libgandiva_jni.dylib + |__ libplasma_java.dylib + +Then, you can build JNI libraries: + +.. code-block:: + + $ mkdir -p java-jni + $ cmake \ + -S java \ + -B java-jni \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=java-dist/lib \ + -DCMAKE_PREFIX_PATH=java-dist + $ cmake --build java-jni --target install + $ ls -latr ../java-dist/lib + |__ libarrow_cdata_jni.dylib + |__ libarrow_dataset_jni.dylib + +To build other JNI libraries: + Building Arrow JNI Modules -------------------------- diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 43818e7a9f364..f187cd943d16a 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -28,6 +28,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Components option(ARROW_JAVA_JNI_ENABLE_DEFAULT "Whether enable components by default or not" ON) option(ARROW_JAVA_JNI_ENABLE_C "Enable C data interface" ${ARROW_JAVA_JNI_ENABLE_DEFAULT}) +option(ARROW_JAVA_JNI_ENABLE_DATASET "Enable dataset" ${ARROW_JAVA_JNI_ENABLE_DEFAULT}) # ccache option(ARROW_JAVA_JNI_USE_CCACHE "Use ccache when compiling (if available)" ON) @@ -54,6 +55,18 @@ include(UseJava) add_library(jni INTERFACE IMPORTED) set_target_properties(jni PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${JNI_INCLUDE_DIRS}") +include(CTest) +if(BUILD_TESTING) + find_package(ArrowTesting REQUIRED) + find_package(GTest REQUIRED) + add_library(arrow_java_test INTERFACE IMPORTED) + target_link_libraries(arrow_java_test INTERFACE ArrowTesting::arrow_testing_static + GTest::gtest_main) +endif() + if(ARROW_JAVA_JNI_ENABLE_C) add_subdirectory(c) endif() +if(ARROW_JAVA_JNI_ENABLE_DATASET) + add_subdirectory(dataset) +endif() diff --git a/java/c/CMakeLists.txt b/java/c/CMakeLists.txt index f3b3117eacf00..7510ab233fe11 100644 --- a/java/c/CMakeLists.txt +++ b/java/c/CMakeLists.txt @@ -18,16 +18,16 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR}) -add_jar(arrow_cdata_jar +add_jar(arrow_java_jni_cdata_jar src/main/java/org/apache/arrow/c/jni/CDataJniException.java src/main/java/org/apache/arrow/c/jni/JniLoader.java src/main/java/org/apache/arrow/c/jni/JniWrapper.java src/main/java/org/apache/arrow/c/jni/PrivateData.java GENERATE_NATIVE_HEADERS - arrow_cdata_jni_headers) + arrow_java_jni_cdata_headers) -set(ARROW_CDATA_JNI_SOURCES src/main/cpp/jni_wrapper.cc) -add_library(arrow_cdata_jni SHARED ${ARROW_CDATA_JNI_SOURCES}) -target_link_libraries(arrow_cdata_jni arrow_cdata_jni_headers jni) +add_library(arrow_java_jni_cdata SHARED src/main/cpp/jni_wrapper.cc) +set_property(TARGET arrow_java_jni_cdata PROPERTY OUTPUT_NAME "arrow_cdata_jni") +target_link_libraries(arrow_java_jni_cdata arrow_java_jni_cdata_headers jni) -install(TARGETS arrow_cdata_jni DESTINATION ${CMAKE_INSTALL_PREFIX}) +install(TARGETS arrow_java_jni_cdata DESTINATION ${CMAKE_INSTALL_PREFIX}) diff --git a/java/dataset/CMakeLists.txt b/java/dataset/CMakeLists.txt index 5b6e4a9ce241a..3b76b4e03bc6e 100644 --- a/java/dataset/CMakeLists.txt +++ b/java/dataset/CMakeLists.txt @@ -15,28 +15,31 @@ # specific language governing permissions and limitations # under the License. -# -# arrow_dataset_java -# - -# Headers: top level - -project(arrow_dataset_java) +find_package(ArrowDataset REQUIRED) -# Find java/jni -include(FindJava) -include(UseJava) -include(FindJNI) +include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} + ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR}) -message("generating headers to ${JNI_HEADERS_DIR}") - -add_jar(arrow_dataset_java +add_jar(arrow_java_jni_dataset_jar src/main/java/org/apache/arrow/dataset/jni/JniLoader.java src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java src/main/java/org/apache/arrow/dataset/file/JniWrapper.java src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java GENERATE_NATIVE_HEADERS - arrow_dataset_java-native - DESTINATION - ${JNI_HEADERS_DIR}) + arrow_java_jni_dataset_headers) + +add_library(arrow_java_jni_dataset SHARED src/main/cpp/jni_wrapper.cc + src/main/cpp/jni_util.cc) +set_property(TARGET arrow_java_jni_dataset PROPERTY OUTPUT_NAME "arrow_dataset_jni") +target_link_libraries(arrow_java_jni_dataset arrow_java_jni_dataset_headers jni + ArrowDataset::arrow_dataset_static) + +if(BUILD_TESTING) + add_executable(arrow-java-jni-dataset-test src/main/cpp/jni_util_test.cc + src/main/cpp/jni_util.cc) + target_link_libraries(arrow-java-jni-dataset-test arrow_java_test) + add_test(NAME arrow-java-jni-dataset-test COMMAND arrow-java-jni-dataset-test) +endif() + +install(TARGETS arrow_java_jni_dataset DESTINATION ${CMAKE_INSTALL_PREFIX}) diff --git a/java/dataset/src/main/cpp/CMakeLists.txt b/java/dataset/src/main/cpp/CMakeLists.txt deleted file mode 100644 index 6a0be9b7f5804..0000000000000 --- a/java/dataset/src/main/cpp/CMakeLists.txt +++ /dev/null @@ -1,65 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitationsn -# under the License. - -# -# arrow_dataset_jni -# - -project(arrow_dataset_jni) - -cmake_minimum_required(VERSION 3.11) - -find_package(JNI REQUIRED) - -add_custom_target(arrow_dataset_jni) - -set(JNI_HEADERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated") - -add_subdirectory(../../../../dataset ./java) - -set(ARROW_BUILD_STATIC OFF) - -set(ARROW_DATASET_JNI_LIBS arrow_dataset_static) - -set(ARROW_DATASET_JNI_SOURCES jni_wrapper.cc jni_util.cc) - -add_arrow_lib(arrow_dataset_jni - BUILD_SHARED - SOURCES - ${ARROW_DATASET_JNI_SOURCES} - OUTPUTS - ARROW_DATASET_JNI_LIBRARIES - SHARED_PRIVATE_LINK_LIBS - ${ARROW_DATASET_JNI_LIBS} - STATIC_LINK_LIBS - ${ARROW_DATASET_JNI_LIBS} - EXTRA_INCLUDES - ${JNI_HEADERS_DIR} - PRIVATE_INCLUDES - ${JNI_INCLUDE_DIRS} - DEPENDENCIES - arrow_static - arrow_dataset_java) - -add_dependencies(arrow_dataset_jni ${ARROW_DATASET_JNI_LIBRARIES}) - -add_arrow_test(dataset_jni_test - SOURCES - jni_util_test.cc - jni_util.cc - EXTRA_INCLUDES - ${JNI_INCLUDE_DIRS})