-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARROW-2034: [C++] Filesystem implementation for Azure Blob Storage #12914
Changes from 8 commits
b60c7f4
1e2d0a3
d3cffa2
af13444
1026e15
5f8b82a
b53a834
5bd8210
eead673
f99fad5
e2008d8
bb49f62
323b394
95cc602
9350b4c
9cd1a1a
8ba75ae
ca9a6fc
f067ba9
1f26725
c16f853
14267c2
11ce11f
a428a2b
a62d104
488e223
3831a88
8248c48
dcd6e30
b15a6b1
a06c480
a40a316
8600b6b
18dc625
b532701
200592b
fe5b311
3ea2d7f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -432,6 +432,46 @@ else() | |
"${THIRDPARTY_MIRROR_URL}/aws-sdk-cpp-${ARROW_AWSSDK_BUILD_VERSION}.tar.gz") | ||
endif() | ||
|
||
if(DEFINED ENV{ARROW_AZURE_CORE_URL}) | ||
set(AZURE_CORE_SOURCE_URL "$ENV{ARROW_AZURE_CORE_URL}") | ||
else() | ||
set_urls(AZURE_CORE_SOURCE_URL | ||
"https://github.com/Azure/azure-sdk-for-cpp/archive/azure-core_${ARROW_AZURE_CORE_BUILD_VERSION}.tar.gz" | ||
) | ||
endif() | ||
|
||
if(DEFINED ENV{ARROW_AZURE_IDENTITY_URL}) | ||
set(AZURE_IDENTITY_SOURCE_URL "$ENV{ARROW_AZURE_IDENTITY_URL}") | ||
else() | ||
set_urls(AZURE_IDENTITY_SOURCE_URL | ||
"https://github.com/Azure/azure-sdk-for-cpp/archive/azure-identity_${ARROW_AZURE_IDENTITY_BUILD_VERSION}.tar.gz" | ||
) | ||
endif() | ||
|
||
if(DEFINED ENV{ARROW_AZURE_STORAGE_BLOB_URL}) | ||
set(AZURE_STORAGE_BLOB_SOURCE_URL "$ENV{ARROW_AZURE_STORAGE_BLOB_URL}") | ||
else() | ||
set_urls(AZURE_STORAGE_BLOB_SOURCE_URL | ||
"https://github.com/Azure/azure-sdk-for-cpp/archive/azure-storage-blobs_${ARROW_AZURE_STORAGE_BLOB_BUILD_VERSION}.tar.gz" | ||
) | ||
endif() | ||
|
||
if(DEFINED ENV{ARROW_AZURE_STORAGE_COMMON_URL}) | ||
set(AZURE_STORAGE_COMMON_SOURCE_URL "$ENV{ARROW_AZURE_STORAGE_COMMON_URL}") | ||
else() | ||
set_urls(AZURE_STORAGE_COMMON_SOURCE_URL | ||
"https://github.com/Azure/azure-sdk-for-cpp/archive/azure-storage-common_${ARROW_AZURE_STORAGE_COMMON_BUILD_VERSION}.tar.gz" | ||
) | ||
endif() | ||
|
||
if(DEFINED ENV{ARROW_AZURE_STORAGE_FILES_DATALAKE_URL}) | ||
set(AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL "$ENV{ARROW_AZURE_STORAGE_FILES_DATALAKE_URL}") | ||
else() | ||
set_urls(AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL | ||
"https://github.com/Azure/azure-sdk-for-cpp/archive/azure-storage-files-datalake_${ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_VERSION}.tar.gz" | ||
) | ||
endif() | ||
|
||
if(DEFINED ENV{ARROW_BOOST_URL}) | ||
set(BOOST_SOURCE_URL "$ENV{ARROW_BOOST_URL}") | ||
else() | ||
|
@@ -4553,6 +4593,105 @@ if(ARROW_S3) | |
endif() | ||
endif() | ||
|
||
macro(build_azuresdk) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you build Azure C++ SDK by |
||
message(STATUS "Building Azure C++ SDK from source") | ||
|
||
set(AZURESDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azuresdk_ep-install") | ||
set(AZURESDK_INCLUDE_DIR "${AZURESDK_PREFIX}/include") | ||
|
||
set(AZURESDK_CMAKE_ARGS | ||
${EP_COMMON_CMAKE_ARGS} | ||
-DBUILD_TESTING=OFF | ||
-DCMAKE_INSTALL_LIBDIR=lib | ||
"-DCMAKE_INSTALL_PREFIX=${AZURESDK_PREFIX}" | ||
-DCMAKE_PREFIX_PATH=${AZURESDK_PREFIX}) | ||
|
||
file(MAKE_DIRECTORY ${AZURESDK_INCLUDE_DIR}) | ||
|
||
# Azure C++ SDK related libraries to link statically | ||
set(_AZURESDK_LIBS | ||
azure-core | ||
azure-identity | ||
azure-storage-blobs | ||
azure-storage-common | ||
azure-storage-files-datalake) | ||
set(AZURESDK_LIBRARIES) | ||
set(AZURESDK_LIBRARIES_CPP) | ||
foreach(_AZURESDK_LIB ${_AZURESDK_LIBS}) | ||
string(TOUPPER ${_AZURESDK_LIB} _AZURESDK_LIB_UPPER) | ||
string(REPLACE "-" "_" _AZURESDK_LIB_NAME_PREFIX ${_AZURESDK_LIB_UPPER}) | ||
list(APPEND AZURESDK_LIBRARIES_CPP "${_AZURESDK_LIB}-cpp") | ||
set(_AZURESDK_TARGET_NAME Azure::${_AZURESDK_LIB}) | ||
set(_AZURESDK_STATIC_LIBRARY | ||
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${_AZURESDK_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}" | ||
) | ||
add_library(${_AZURESDK_TARGET_NAME} STATIC IMPORTED) | ||
set_target_properties(${_AZURESDK_TARGET_NAME} | ||
PROPERTIES IMPORTED_LOCATION ${_AZURESDK_STATIC_LIBRARY} | ||
INTERFACE_INCLUDE_DIRECTORIES | ||
"${AZURESDK_INCLUDE_DIR}") | ||
set("${_AZURESDK_LIB_NAME_PREFIX}_STATIC_LIBRARY" ${_AZURESDK_STATIC_LIBRARY}) | ||
list(APPEND AZURESDK_LIBRARIES ${_AZURESDK_TARGET_NAME}) | ||
endforeach() | ||
|
||
externalproject_add(azure_core_ep | ||
${EP_LOG_OPTIONS} | ||
URL ${AZURE_CORE_SOURCE_URL} | ||
URL_HASH "SHA256=${ARROW_AZURE_CORE_BUILD_SHA256_CHECKSUM}" | ||
CMAKE_ARGS ${AZURESDK_CMAKE_ARGS} | ||
BUILD_BYPRODUCTS ${AZURE_CORE_STATIC_LIBRARY}) | ||
add_dependencies(Azure::azure-core azure_core_ep) | ||
|
||
externalproject_add(azure_identity_ep | ||
${EP_LOG_OPTIONS} | ||
URL ${AZURE_IDENTITY_SOURCE_URL} | ||
URL_HASH "SHA256=${ARROW_AZURE_IDENTITY_BUILD_SHA256_CHECKSUM}" | ||
CMAKE_ARGS ${AZURESDK_CMAKE_ARGS} | ||
BUILD_BYPRODUCTS ${AZURE_IDENTITY_STATIC_LIBRARY}) | ||
add_dependencies(Azure::azure-identity azure_identity_ep) | ||
|
||
externalproject_add(azure_storage_blobs_ep | ||
${EP_LOG_OPTIONS} | ||
URL ${AZURE_STORAGE_BLOB_SOURCE_URL} | ||
URL_HASH "SHA256=${ARROW_AZURE_STORAGE_BLOB_BUILD_SHA256_CHECKSUM}" | ||
CMAKE_ARGS ${AZURESDK_CMAKE_ARGS} | ||
BUILD_BYPRODUCTS ${AZURE_STORAGE_BLOBS_STATIC_LIBRARY}) | ||
add_dependencies(Azure::azure-storage-blobs azure_storage_blobs_ep) | ||
|
||
externalproject_add(azure_storage_common_ep | ||
${EP_LOG_OPTIONS} | ||
URL ${AZURE_STORAGE_COMMON_SOURCE_URL} | ||
URL_HASH "SHA256=${ARROW_AZURE_STORAGE_COMMON_BUILD_SHA256_CHECKSUM}" | ||
CMAKE_ARGS ${AZURESDK_CMAKE_ARGS} | ||
BUILD_BYPRODUCTS ${AZURE_STORAGE_COMMON_STATIC_LIBRARY}) | ||
add_dependencies(Azure::azure-storage-common azure_storage_common_ep) | ||
|
||
externalproject_add(azure_storage_files_datalake_ep | ||
${EP_LOG_OPTIONS} | ||
URL ${AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL} | ||
URL_HASH "SHA256=${ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_SHA256_CHECKSUM}" | ||
CMAKE_ARGS ${AZURESDK_CMAKE_ARGS} | ||
BUILD_BYPRODUCTS ${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY}) | ||
add_dependencies(Azure::azure-storage-files-datalake azure_storage_files_datalake_ep) | ||
|
||
set_property(TARGET Azure::azure-core | ||
APPEND | ||
PROPERTY INTERFACE_LINK_LIBRARIES CURL::libcurl LibXml2::LibXml2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to call |
||
|
||
set(AZURESDK_LINK_LIBRARIES ${AZURESDK_LIBRARIES}) | ||
endmacro() | ||
|
||
if(ARROW_AZURE) | ||
# TODO - use resolve_dependency | ||
build_azuresdk() | ||
foreach(AZURESDK_LIBRARY_CPP ${AZURESDK_LIBRARIES_CPP}) | ||
find_package(${AZURESDK_LIBRARY_CPP} CONFIG REQUIRED) | ||
endforeach() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is needless. |
||
include_directories(SYSTEM ${AZURESDK_INCLUDE_DIR}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is needless. |
||
message(STATUS "Found Azure SDK headers: ${AZURESDK_INCLUDE_DIR}") | ||
message(STATUS "Found Azure SDK libraries: ${AZURESDK_LINK_LIBRARIES}") | ||
endif() | ||
|
||
# ---------------------------------------------------------------------- | ||
# ucx - communication framework for modern, high-bandwidth and low-latency networks | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,34 @@ if(ARROW_GCS) | |
Boost::system) | ||
endif() | ||
|
||
if(ARROW_AZURE) | ||
set(AZURE_SRCS) | ||
list(APPEND | ||
AZURE_SRCS | ||
azurefs_mock.cc | ||
azurefs.cc) | ||
|
||
add_arrow_lib(azurefs | ||
SOURCES | ||
${AZURE_SRCS} | ||
SHARED_LINK_LIBS | ||
${ARROW_LINK_LIBS} | ||
SHARED_PRIVATE_LINK_LIBS | ||
${ARROW_SHARED_PRIVATE_LINK_LIBS} | ||
STATIC_LINK_LIBS | ||
${ARROW_STATIC_LINK_LIBS}) | ||
|
||
set_target_properties(azurefs_objlib PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED ON) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, sorry. I forgot that we embed filesystem modules to How about changing the default C++ version? diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index d3a2a1a2d2..cdaafe379b 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -118,12 +118,16 @@ if(NOT DEFINED CMAKE_C_STANDARD)
set(CMAKE_C_STANDARD 11)
endif()
-# This ensures that things like c++11 get passed correctly
+# This ensures that things like c++11/c++14 get passed correctly
if(NOT DEFINED CMAKE_CXX_STANDARD)
- set(CMAKE_CXX_STANDARD 11)
+ if(ARROW_AZURE)
+ set(CMAKE_CXX_STANDARD 14)
+ else()
+ set(CMAKE_CXX_STANDARD 11)
+ endif()
endif()
-# We require a C++11 compliant compiler
+# We require a C++11/14 compliant compiler
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# ARROW-6848: Do not use GNU (or other CXX) extensions
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index ec6cada1cd..1ded8e59d4 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -469,6 +469,12 @@ if(ARROW_FILESYSTEM)
filesystem/path_util.cc
filesystem/util_internal.cc)
+ if(ARROW_AZURE)
+ list(APPEND ARROW_SRCS filesystem/azurefs.cc filesystem/azurefs_mock.cc)
+ set_source_files_properties(filesystem/azurefs.cc filesystem/azurefs_mock.cc
+ PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+ SKIP_UNITY_BUILD_INCLUSION ON)
+ endif()
if(ARROW_GCS)
list(APPEND ARROW_SRCS filesystem/gcsfs.cc filesystem/gcsfs_internal.cc)
set_source_files_properties(filesystem/gcsfs.cc filesystem/gcsfs_internal.cc
diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index 819eca08cf..bbca231baf 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -28,8 +28,8 @@ add_arrow_test(filesystem-test
EXTRA_LABELS
filesystem)
-if(ARROW_GCS)
- add_arrow_test(gcsfs_test
+if(ARROW_AZURE)
+ add_arrow_test(azurefs_test
EXTRA_LABELS
filesystem
EXTRA_LINK_LIBS
@@ -37,32 +37,13 @@ if(ARROW_GCS)
Boost::system)
endif()
-if(ARROW_AZURE)
- set(AZURE_SRCS)
- list(APPEND
- AZURE_SRCS
- azurefs_mock.cc
- azurefs.cc)
-
- add_arrow_lib(azurefs
- SOURCES
- ${AZURE_SRCS}
- SHARED_LINK_LIBS
- ${ARROW_LINK_LIBS}
- SHARED_PRIVATE_LINK_LIBS
- ${ARROW_SHARED_PRIVATE_LINK_LIBS}
- STATIC_LINK_LIBS
- ${ARROW_STATIC_LINK_LIBS})
-
- set_target_properties(azurefs_objlib PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED ON)
-
- set(TEST_LIBS_AZURE ${ARROW_TEST_LINK_LIBS})
- list(APPEND TEST_LIBS_AZURE azurefs_shared)
- add_arrow_test(azurefs_test EXTRA_LABELS filesystem
- STATIC_LINK_LIBS
- ${TEST_LIBS_AZURE}
- )
- set_target_properties(arrow-azurefs-test PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED ON)
+if(ARROW_GCS)
+ add_arrow_test(gcsfs_test
+ EXTRA_LABELS
+ filesystem
+ EXTRA_LINK_LIBS
+ Boost::filesystem
+ Boost::system)
endif()
if(ARROW_S3) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we change the default C++ version, then it will pick C++14 for the entire compilation, and looks like from this conversation - https://issues.apache.org/jira/browse/ARROW-2034?focusedCommentId=17463318&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17463318, it is not desired, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that @pitrou said that we can use C++14 features in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right. Also, |
||
|
||
set(TEST_LIBS_AZURE ${ARROW_TEST_LINK_LIBS}) | ||
list(APPEND TEST_LIBS_AZURE azurefs_shared) | ||
add_arrow_test(azurefs_test EXTRA_LABELS filesystem | ||
STATIC_LINK_LIBS | ||
${TEST_LIBS_AZURE} | ||
) | ||
set_target_properties(arrow-azurefs-test PROPERTIES CXX_STANDARD 14 CXX_STANDARD_REQUIRED ON) | ||
endif() | ||
|
||
if(ARROW_S3) | ||
add_arrow_test(s3fs_test | ||
SOURCES | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you keep this list in alphabetical order?