Skip to content

Commit

Permalink
[arrow] Update Arrow to 9.0.0 (#26187)
Browse files Browse the repository at this point in the history
* update patches

* fix parquet

* format

* add xsimd dep

* update version

* cuda fix

* update version
  • Loading branch information
assignUser authored Aug 16, 2022
1 parent a9a2317 commit cd5e794
Show file tree
Hide file tree
Showing 8 changed files with 216 additions and 174 deletions.
61 changes: 61 additions & 0 deletions ports/arrow/cuda-ptr.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
diff --git a/cpp/src/arrow/gpu/cuda_context.h b/cpp/src/arrow/gpu/cuda_context.h
index 00bcf94c8..0115ed19a 100644
--- a/cpp/src/arrow/gpu/cuda_context.h
+++ b/cpp/src/arrow/gpu/cuda_context.h
@@ -76,7 +76,7 @@ class ARROW_EXPORT CudaDeviceManager {
static std::unique_ptr<CudaDeviceManager> instance_;

class Impl;
- std::unique_ptr<Impl> impl_;
+ std::shared_ptr<Impl> impl_;

friend class CudaContext;
friend class CudaDevice;
@@ -146,7 +146,7 @@ class ARROW_EXPORT CudaDevice : public Device {
/// \endcond

explicit CudaDevice(Impl);
- std::unique_ptr<Impl> impl_;
+ std::shared_ptr<Impl> impl_;
};

/// \brief Return whether a device instance is a CudaDevice
@@ -297,7 +297,7 @@ class ARROW_EXPORT CudaContext : public std::enable_shared_from_this<CudaContext
uintptr_t dst, uintptr_t src, int64_t nbytes);

class Impl;
- std::unique_ptr<Impl> impl_;
+ std::shared_ptr<Impl> impl_;

friend class CudaBuffer;
friend class CudaBufferReader;
diff --git a/cpp/src/arrow/gpu/cuda_internal.h b/cpp/src/arrow/gpu/cuda_internal.h
index 25eb6e06c..d70873634 100644
--- a/cpp/src/arrow/gpu/cuda_internal.h
+++ b/cpp/src/arrow/gpu/cuda_internal.h
@@ -33,6 +33,7 @@ namespace internal {

std::string CudaErrorDescription(CUresult err);

+ARROW_EXPORT
Status StatusFromCuda(CUresult res, const char* function_name = nullptr);

#define CU_RETURN_NOT_OK(FUNC_NAME, STMT) \
diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h
index 4efd38894..18c23a507 100644
--- a/cpp/src/arrow/gpu/cuda_memory.h
+++ b/cpp/src/arrow/gpu/cuda_memory.h
@@ -250,10 +250,12 @@ Result<std::shared_ptr<CudaHostBuffer>> AllocateCudaHostBuffer(int device_number
const int64_t size);

/// Low-level: get a device address through which the CPU data be accessed.
+ARROW_EXPORT
Result<uintptr_t> GetDeviceAddress(const uint8_t* cpu_data,
const std::shared_ptr<CudaContext>& ctx);

/// Low-level: get a CPU address through which the device data be accessed.
+ARROW_EXPORT
Result<uint8_t*> GetHostAddress(uintptr_t device_ptr);

} // namespace cuda

147 changes: 25 additions & 122 deletions ports/arrow/fix-ThirdPartyToolchain.patch
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7d9c5c7..33afd70 100644
index bca86d4c1..52f87aa97 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -705,7 +705,7 @@ endif()
@@ -737,7 +737,7 @@ endif()

if(ARROW_WITH_BROTLI)
# Order is important for static linking
Expand All @@ -11,19 +11,7 @@ index 7d9c5c7..33afd70 100644
list(APPEND ARROW_LINK_LIBS ${ARROW_BROTLI_LIBS})
list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_BROTLI_LIBS})
if(Brotli_SOURCE STREQUAL "SYSTEM")
@@ -721,9 +721,9 @@ if(ARROW_WITH_BZ2)
endif()

if(ARROW_WITH_LZ4)
- list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4)
+ list(APPEND ARROW_STATIC_LINK_LIBS lz4::lz4)
if(Lz4_SOURCE STREQUAL "SYSTEM")
- list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4)
+ list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS lz4::lz4)
endif()
endif()

@@ -793,10 +793,10 @@ if(ARROW_WITH_OPENTELEMETRY)
@@ -825,10 +825,10 @@ if(ARROW_WITH_OPENTELEMETRY)
endif()

if(ARROW_WITH_UTF8PROC)
Expand All @@ -38,7 +26,7 @@ index 7d9c5c7..33afd70 100644
endif()

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index f070323..290b5a7 100644
index 5d1da18b7..9c5adf47d 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -53,7 +53,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES
Expand All @@ -50,35 +38,24 @@ index f070323..290b5a7 100644
BZip2
c-ares
gflags
@@ -62,7 +62,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES
gRPC
GTest
LLVM
- Lz4
+ lz4
nlohmann_json
opentelemetry-cpp
ORC
@@ -72,7 +72,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES
Snappy
@@ -74,7 +74,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES
Substrait
Thrift
ucx
- utf8proc
+ unofficial-utf8proc
xsimd
ZLIB
zstd)
@@ -139,6 +139,9 @@ foreach(DEPENDENCY ${ARROW_THIRDPARTY_DEPENDENCIES})
@@ -152,6 +152,7 @@ foreach(DEPENDENCY ${ARROW_THIRDPARTY_DEPENDENCIES})
endforeach()

macro(build_dependency DEPENDENCY_NAME)
+ if(NOT "${DEPENDENCY_NAME}" STREQUAL "xsimd")
+ message(FATAL_ERROR "dependencies should never be built in vcpkg")
+ endif()
+ message(FATAL_ERROR "dependencies should never be built in vcpkg")
if("${DEPENDENCY_NAME}" STREQUAL "AWSSDK")
build_awssdk()
elseif("${DEPENDENCY_NAME}" STREQUAL "benchmark")
@@ -222,7 +225,7 @@ macro(resolve_dependency DEPENDENCY_NAME)
@@ -239,7 +240,7 @@ macro(resolve_dependency DEPENDENCY_NAME)
endif()

if(ARG_HAVE_ALT)
Expand All @@ -87,9 +64,11 @@ index f070323..290b5a7 100644
else()
set(PACKAGE_NAME ${DEPENDENCY_NAME})
endif()
@@ -1061,7 +1064,21 @@ endmacro()
if(ARROW_WITH_SNAPPY)
resolve_dependency(Snappy PC_PACKAGE_NAMES snappy)
@@ -1138,8 +1139,23 @@ if(ARROW_WITH_SNAPPY)
TRUE
PC_PACKAGE_NAMES
snappy)
+ set(Snappy_TARGET "Snappy::snappy")
if(${Snappy_SOURCE} STREQUAL "SYSTEM" AND NOT snappy_PC_FOUND)
+ get_target_property(SNAPPY_IMPLIB_DEBUG Snappy::snappy IMPORTED_IMPLIB_DEBUG)
+ get_target_property(SNAPPY_LIB_DEBUG Snappy::snappy IMPORTED_LOCATION_DEBUG)
Expand All @@ -104,115 +83,39 @@ index f070323..290b5a7 100644
+ elseif(CMAKE_BUILD_TYPE STREQUAL "RELEASE" AND SNAPPY_LIB_RELEASE)
+ set(SNAPPY_LIB "${SNAPPY_LIB_RELEASE}")
+ else()
get_target_property(SNAPPY_LIB Snappy::snappy IMPORTED_LOCATION)
get_target_property(SNAPPY_TYPE ${Snappy_TARGET} TYPE)
+ endif()
string(APPEND ARROW_PC_LIBS_PRIVATE " ${SNAPPY_LIB}")
endif()
# TODO: Don't use global includes but rather target_include_directories
@@ -1129,10 +1132,14 @@ macro(build_brotli)
if(NOT SNAPPY_TYPE STREQUAL "INTERFACE_LIBRARY")
get_target_property(SNAPPY_LIB ${Snappy_TARGET}
IMPORTED_LOCATION_${UPPERCASE_BUILD_TYPE})
@@ -1214,7 +1230,8 @@ macro(build_brotli)
endmacro()

if(ARROW_WITH_BROTLI)
- resolve_dependency(Brotli PC_PACKAGE_NAMES libbrotlidec libbrotlienc)
+ set(Brotli_SOURCE SYSTEM)
+ resolve_dependency(unofficial-brotli USE_CONFIG TRUE PC_PACKAGE_NAMES libbrotlidec libbrotlienc)
# TODO: Don't use global includes but rather target_include_directories
- get_target_property(BROTLI_INCLUDE_DIR Brotli::brotlicommon
+ get_target_property(BROTLI_INCLUDE_DIR unofficial::brotli::brotlicommon
INTERFACE_INCLUDE_DIRECTORIES)
+ if(NOT BROTLI_INCLUDE_DIR)
+ get_target_property(BROTLI_INCLUDE_DIR unofficial::brotli::brotlicommon-static INTERFACE_INCLUDE_DIRECTORIES)
+ endif()
include_directories(SYSTEM ${BROTLI_INCLUDE_DIR})
endif()

@@ -1434,6 +1446,13 @@ if(ARROW_WITH_THRIFT)
# TODO: Don't use global includes but rather target_include_directories
include_directories(SYSTEM ${THRIFT_INCLUDE_DIR})

+ if(THRIFT_INCLUDE_DIR)
+ file(READ "${THRIFT_INCLUDE_DIR}/config.h" THRIFT_CONFIG_H_CONTENT)
+ string(REGEX MATCH "#define PACKAGE_VERSION \"[0-9.]+\"" THRIFT_VERSION_DEFINITION "${THRIFT_CONFIG_H_CONTENT}")
+ string(REGEX MATCH "[0-9.]+" THRIFT_VERSION "${THRIFT_VERSION_DEFINITION}")
+ set(THRIFT_VERSION "${THRIFT_VERSION}")
+ endif()
+
string(REPLACE "." ";" VERSION_LIST ${THRIFT_VERSION})
list(GET VERSION_LIST 0 THRIFT_VERSION_MAJOR)
list(GET VERSION_LIST 1 THRIFT_VERSION_MINOR)
@@ -1561,6 +1580,8 @@ if(ARROW_WITH_PROTOBUF)
if(PARQUET_REQUIRE_ENCRYPTION AND NOT ARROW_PARQUET)
@@ -1646,7 +1663,8 @@ if(ARROW_WITH_PROTOBUF)
${ARROW_PROTOBUF_REQUIRED_VERSION}
PC_PACKAGE_NAMES
protobuf)
-
+ get_target_property(PROTOBUF_INCLUDE_DIR protobuf::libprotobuf
+ INTERFACE_INCLUDE_DIRECTORIES)

if(NOT Protobuf_USE_STATIC_LIBS AND MSVC_TOOLCHAIN)
add_definitions(-DPROTOBUF_USE_DLLS)
@@ -2260,10 +2276,11 @@ macro(build_lz4)
endmacro()

if(ARROW_WITH_LZ4)
- resolve_dependency(Lz4 PC_PACKAGE_NAMES liblz4)
+ set(Lz4_SOURCE SYSTEM)
+ resolve_dependency(lz4 USE_CONFIG TRUE PC_PACKAGE_NAMES liblz4)

# TODO: Don't use global includes but rather target_include_directories
- get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
+ get_target_property(LZ4_INCLUDE_DIR lz4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
endif()

@@ -2517,9 +2534,8 @@ macro(build_utf8proc)
endif()
@@ -2599,9 +2617,7 @@ macro(build_utf8proc)
endmacro()

if(ARROW_WITH_UTF8PROC)
- resolve_dependency(utf8proc
- REQUIRED_VERSION
- "2.2.0"
+ set(utf8proc_SOURCE SYSTEM)
+ resolve_dependency(unofficial-utf8proc
PC_PACKAGE_NAMES
libutf8proc)

@@ -2527,7 +2543,7 @@ if(ARROW_WITH_UTF8PROC)

# TODO: Don't use global definitions but rather
# target_compile_definitions or target_link_libraries
- get_target_property(UTF8PROC_COMPILER_DEFINITIONS utf8proc::utf8proc
+ get_target_property(UTF8PROC_COMPILER_DEFINITIONS utf8proc
INTERFACE_COMPILER_DEFINITIONS)
if(UTF8PROC_COMPILER_DEFINITIONS)
add_definitions(-D${UTF8PROC_COMPILER_DEFINITIONS})
@@ -2535,7 +2551,7 @@ if(ARROW_WITH_UTF8PROC)

# TODO: Don't use global includes but rather
# target_include_directories or target_link_libraries
- get_target_property(UTF8PROC_INCLUDE_DIR utf8proc::utf8proc
+ get_target_property(UTF8PROC_INCLUDE_DIR utf8proc
INTERFACE_INCLUDE_DIRECTORIES)
include_directories(SYSTEM ${UTF8PROC_INCLUDE_DIR})
endif()
@@ -3719,6 +3735,9 @@ if(ARROW_WITH_GRPC)
# TODO: Don't use global includes but rather target_include_directories
get_target_property(GRPC_INCLUDE_DIR gRPC::grpc++ INTERFACE_INCLUDE_DIRECTORIES)
include_directories(SYSTEM ${GRPC_INCLUDE_DIR})
+ if (ABSL_USE_CXX17)
+ set(CMAKE_CXX_STANDARD 17)
+ endif()

if(GRPC_VENDORED)
set(GRPCPP_PP_INCLUDE TRUE)
diff --git a/cpp/src/arrow/adapters/orc/CMakeLists.txt b/cpp/src/arrow/adapters/orc/CMakeLists.txt
index b1b6847..444a45e 100644
--- a/cpp/src/arrow/adapters/orc/CMakeLists.txt
+++ b/cpp/src/arrow/adapters/orc/CMakeLists.txt
@@ -30,7 +30,7 @@ set(ORC_MIN_TEST_LIBS
GTest::gtest_main
GTest::gtest
Snappy::snappy
- LZ4::lz4
+ lz4::lz4
ZLIB::ZLIB)

if(ARROW_BUILD_STATIC)
add_definitions(-DARROW_WITH_UTF8PROC)
8 changes: 4 additions & 4 deletions ports/arrow/portfile.cmake
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
vcpkg_from_github(
OUT_SOURCE_PATH SOURCE_PATH
REPO apache/arrow
REF apache-arrow-8.0.1
SHA512 b32f5a3666de7d6d16ea828697bb42e1b6605f58719c42e670c9ec0a8782057dac933f6e14e97b46f82802fc38cc7f4cc825794a4a95ac641593c2ee26ac5bbe
REF apache-arrow-9.0.0
SHA512 1191793dd56471fb2b402afbe9b31cde4c5126785243e538e42ba95ccd31d523121f07b144461c99a4b7449e611aa5998bd0de95e8e4b0e3c80397499fe746f0
HEAD_REF master
PATCHES
vs-2022-fixes.patch
cuda-ptr.patch
msvc-static-name.patch
fix-ThirdPartyToolchain.patch
static-link-libs.patch # https://github.com/apache/arrow/pull/13707
static-link-libs.patch # https://github.com/apache/arrow/pull/13707 & pull/13863
)
file(REMOVE "${SOURCE_PATH}/cpp/cmake_modules/Findzstd.cmake"
"${SOURCE_PATH}/cpp/cmake_modules/FindBrotli.cmake"
Expand Down
Loading

0 comments on commit cd5e794

Please sign in to comment.