diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index b420ec063955a..3d6b89f2f51a1 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -217,6 +217,7 @@ jobs: ARROW_HOME: /usr ARROW_JEMALLOC: OFF ARROW_MIMALLOC: ON + ARROW_ORC: ON ARROW_PARQUET: ON ARROW_USE_GLOG: OFF ARROW_VERBOSE_THIRDPARTY_BUILD: OFF @@ -286,7 +287,10 @@ jobs: bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build" - name: Test shell: bash - run: ci/scripts/cpp_test.sh $(pwd) $(pwd)/build + run: | + # For ORC + export TZDIR=/c/msys64/usr/share/zoneinfo + ci/scripts/cpp_test.sh $(pwd) $(pwd)/build windows-mingw: name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} C++ diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index cbff03d1bbba6..5ddb8e370abe4 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -72,6 +72,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^ -DARROW_HDFS=ON ^ -DARROW_JSON=ON ^ -DARROW_MIMALLOC=ON ^ + -DARROW_ORC=ON ^ -DARROW_PARQUET=ON ^ -DARROW_S3=%ARROW_S3% ^ -DARROW_SUBSTRAIT=ON ^ @@ -93,13 +94,11 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^ .. || exit /B cmake --build . --target install --config Release || exit /B -@rem Needed so arrow-python-test.exe works -set OLD_PYTHONHOME=%PYTHONHOME% -set PYTHONHOME=%CONDA_PREFIX% +@rem For ORC C++ +set TZDIR=%CONDA_PREFIX%\share\zoneinfo ctest --output-on-failure || exit /B -set PYTHONHOME=%OLD_PYTHONHOME% popd @rem diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index dd313f19d708f..98e9364fd1b58 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -22,21 +22,19 @@ brotli bzip2 c-ares cmake +flatbuffers gflags glog gmock>=1.10.0 google-cloud-cpp>=1.34.0 -# 1.45.0 appears to segfault on Windows/AppVeyor -grpc-cpp>=1.27.3,<1.45.0 +grpc-cpp gtest>=1.10.0 libprotobuf libutf8proc lz4-c make ninja -# Required by google-cloud-cpp, the Conda package is missing the dependency: -# https://github.com/conda-forge/google-cloud-cpp-feedstock/issues/28 -nlohmann_json +orc pkg-config python rapidjson @@ -46,4 +44,3 @@ thrift-cpp>=0.11.0 xsimd zlib zstd -flatbuffers diff --git a/ci/scripts/java_jni_windows_build.sh b/ci/scripts/java_jni_windows_build.sh index 0df60b7777e0a..8d142bd4254ee 100755 --- a/ci/scripts/java_jni_windows_build.sh +++ b/ci/scripts/java_jni_windows_build.sh @@ -33,8 +33,7 @@ install_dir=${build_dir}/cpp-install : ${ARROW_BUILD_TESTS:=ON} : ${ARROW_DATASET:=ON} export ARROW_DATASET -# We can enable this after ARROW-17817 is resolved. -: ${ARROW_ORC:=OFF} +: ${ARROW_ORC:=ON} export ARROW_ORC : ${ARROW_PARQUET:=ON} : ${ARROW_S3:=ON} diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index b56918e602c64..0a0f24b47f35a 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -122,8 +122,6 @@ endmacro() macro(resolve_option_dependencies) if(MSVC_TOOLCHAIN) - # ARROW-17817: ORC can't be built on Windows. - set(ARROW_ORC OFF) # Plasma using glog is not fully tested on windows. set(ARROW_USE_GLOG OFF) endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index e9e71328afc70..1c9670208aca1 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4286,6 +4286,9 @@ macro(build_orc) get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) + get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) + # Weirdly passing in PROTOBUF_LIBRARY for PROTOC_LIBRARY still results in ORC finding # the protoc library. set(ORC_CMAKE_ARGS @@ -4305,8 +4308,8 @@ macro(build_orc) "-DPROTOBUF_INCLUDE_DIR=${ORC_PROTOBUF_INCLUDE_DIR}" "-DPROTOBUF_LIBRARY=${ORC_PROTOBUF_LIBRARY}" "-DPROTOC_LIBRARY=${ORC_PROTOBUF_LIBRARY}" - "-DLZ4_HOME=${LZ4_HOME}" - "-DZSTD_HOME=${ZSTD_HOME}") + "-DLZ4_HOME=${ORC_LZ4_ROOT}" + "-DZSTD_HOME=${ORZ_ZSTD_ROOT}") if(ORC_PROTOBUF_EXECUTABLE) set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DPROTOBUF_EXECUTABLE:FILEPATH=${ORC_PROTOBUF_EXECUTABLE}") @@ -4322,20 +4325,27 @@ macro(build_orc) URL ${ORC_SOURCE_URL} URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" BUILD_BYPRODUCTS ${ORC_STATIC_LIB} - CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS}) - - add_dependencies(toolchain orc_ep) + CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS} + DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} + ${ARROW_ZSTD_LIBZSTD} + ${Snappy_TARGET} + LZ4::lz4 + ZLIB::ZLIB) set(ORC_VENDORED 1) - add_dependencies(orc_ep ZLIB::ZLIB) - add_dependencies(orc_ep LZ4::lz4) - add_dependencies(orc_ep ${Snappy_TARGET}) - add_dependencies(orc_ep ${ARROW_PROTOBUF_LIBPROTOBUF}) add_library(orc::liborc STATIC IMPORTED) set_target_properties(orc::liborc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES "${ORC_INCLUDE_DIR}") + target_link_libraries(orc::liborc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} + ${Snappy_TARGET}) + if(NOT MSVC) + if(NOT APPLE) + target_link_libraries(orc::liborc INTERFACE Threads::Threads) + endif() + target_link_libraries(orc::liborc INTERFACE ${CMAKE_DL_LIBS}) + endif() add_dependencies(toolchain orc_ep) add_dependencies(orc::liborc orc_ep) diff --git a/cpp/src/arrow/adapters/orc/CMakeLists.txt b/cpp/src/arrow/adapters/orc/CMakeLists.txt index 6b2536bb55503..3c695abb5a02b 100644 --- a/cpp/src/arrow/adapters/orc/CMakeLists.txt +++ b/cpp/src/arrow/adapters/orc/CMakeLists.txt @@ -26,27 +26,14 @@ install(FILES adapter.h options.h # pkg-config support arrow_add_pkg_config("arrow-orc") -set(ORC_MIN_TEST_LIBS - GTest::gtest_main - GTest::gtest - ${Snappy_TARGET} - LZ4::lz4 - ZLIB::ZLIB) - if(ARROW_BUILD_STATIC) set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_static arrow_static) else() set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_shared arrow_shared) endif() -if(APPLE) - set(ORC_MIN_TEST_LIBS ${ORC_MIN_TEST_LIBS} ${CMAKE_DL_LIBS}) -elseif(NOT MSVC) - set(ORC_MIN_TEST_LIBS ${ORC_MIN_TEST_LIBS} pthread ${CMAKE_DL_LIBS}) -endif() - set(ORC_STATIC_TEST_LINK_LIBS orc::liborc ${ARROW_LIBRARIES_FOR_STATIC_TESTS} - ${ORC_MIN_TEST_LIBS}) + GTest::gtest_main GTest::gtest) add_arrow_test(adapter_test PREFIX diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc index 1efc02bc404db..5c234cc97c4a0 100644 --- a/cpp/src/arrow/adapters/orc/adapter_test.cc +++ b/cpp/src/arrow/adapters/orc/adapter_test.cc @@ -42,22 +42,22 @@ namespace arrow { using internal::checked_pointer_cast; -constexpr int kDefaultSmallMemStreamSize = 16384 * 5; // 80KB -constexpr int kDefaultMemStreamSize = 10 * 1024 * 1024; +constexpr size_t kDefaultSmallMemStreamSize = 16384 * 5; // 80KB +constexpr size_t kDefaultMemStreamSize = 10 * 1024 * 1024; constexpr int64_t kNanoMax = std::numeric_limits::max(); constexpr int64_t kNanoMin = std::numeric_limits::lowest(); -const int64_t kMicroMax = std::floor(kNanoMax / 1000); -const int64_t kMicroMin = std::ceil(kNanoMin / 1000); -const int64_t kMilliMax = std::floor(kMicroMax / 1000); -const int64_t kMilliMin = std::ceil(kMicroMin / 1000); -const int64_t kSecondMax = std::floor(kMilliMax / 1000); -const int64_t kSecondMin = std::ceil(kMilliMin / 1000); +const int64_t kMicroMax = static_cast(std::floor(kNanoMax / 1000)); +const int64_t kMicroMin = static_cast(std::ceil(kNanoMin / 1000)); +const int64_t kMilliMax = static_cast(std::floor(kMicroMax / 1000)); +const int64_t kMilliMin = static_cast(std::ceil(kMicroMin / 1000)); +const int64_t kSecondMax = static_cast(std::floor(kMilliMax / 1000)); +const int64_t kSecondMin = static_cast(std::ceil(kMilliMin / 1000)); static constexpr random::SeedType kRandomSeed = 0x0ff1ce; class MemoryOutputStream : public liborc::OutputStream { public: - explicit MemoryOutputStream(ssize_t capacity) + explicit MemoryOutputStream(size_t capacity) : data_(capacity), name_("MemoryOutputStream"), length_(0) {} uint64_t getLength() const override { return length_; } @@ -86,12 +86,13 @@ class MemoryOutputStream : public liborc::OutputStream { std::shared_ptr GenerateFixedDifferenceBuffer(int32_t fixed_length, int64_t length) { BufferBuilder builder; - int32_t offsets[length]; + std::vector offsets; + offsets.resize(length); ARROW_EXPECT_OK(builder.Resize(4 * length)); - for (int32_t i = 0; i < length; i++) { - offsets[i] = fixed_length * i; + for (int64_t i = 0; i < length; i++) { + offsets[i] = static_cast(fixed_length * i); } - ARROW_EXPECT_OK(builder.Append(offsets, 4 * length)); + ARROW_EXPECT_OK(builder.Append(offsets.data(), 4 * length)); std::shared_ptr buffer; ARROW_EXPECT_OK(builder.Finish(&buffer)); return buffer; diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index 6f7fdc82d5f0f..763f5df5cd775 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -101,6 +101,8 @@ jobs: shell: cmd run: | call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + REM For ORC + set TZDIR=/c/msys64/usr/share/zoneinfo bash -c "arrow/ci/scripts/java_jni_windows_build.sh $(pwd)/arrow $(pwd)/arrow/cpp-build $(pwd)/arrow/java-dist" - name: Compress into single artifact to keep directory structure shell: bash @@ -148,8 +150,7 @@ jobs: test -f arrow/java-dist/arrow_dataset_jni.dll test -f arrow/java-dist/libarrow_orc_jni.dylib test -f arrow/java-dist/libarrow_orc_jni.so - # We can enable this after ARROW-17817 is resolved. - # test -f arrow/java-dist/arrow_orc_jni.dll + test -f arrow/java-dist/arrow_orc_jni.dll test -f arrow/java-dist/libgandiva_jni.dylib test -f arrow/java-dist/libgandiva_jni.so test -f arrow/java-dist/libplasma_java.dylib