From 99cbeffacf53673cb5750d2dd615b72834bcecf9 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Wed, 28 Jul 2021 09:25:36 +0200 Subject: [PATCH 01/36] MINIFICPP-1616 Add Azure DataLake storage lib to minifi --- .github/workflows/ci.yml | 2 +- cmake/BundledAzureSdkCpp.cmake | 52 ++++++------------- extensions/azure/CMakeLists.txt | 2 +- extensions/azure/storage/AzureBlobStorage.cpp | 9 ++-- .../azure-sdk-for-cpp-old-compiler.patch | 42 --------------- ...fix-illegal-qualified-name-in-member.patch | 14 ----- .../azure-sdk-cpp-openssl-include-fix.patch | 14 +++++ 7 files changed, 35 insertions(+), 100 deletions(-) delete mode 100644 thirdparty/azure-sdk-cpp-for-cpp/azure-sdk-for-cpp-old-compiler.patch delete mode 100644 thirdparty/azure-sdk-cpp-for-cpp/fix-illegal-qualified-name-in-member.patch create mode 100644 thirdparty/azure-sdk-cpp/azure-sdk-cpp-openssl-include-fix.patch diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c1a9769d23..efbf11ecb8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: export LDFLAGS="-L/usr/local/opt/flex/lib" export CPPFLAGS="-I/usr/local/opt/flex/include" # CPPFLAGS are not recognized by cmake, so we have to force them to CFLAGS and CXXFLAGS to have flex 2.6 working - ./bootstrap.sh -e -t && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="${CPPFLAGS} ${CFLAGS}" -DCMAKE_CXX_FLAGS="${CPPFLAGS} ${CXXFLAGS}" -DENABLE_LUA_SCRIPTING=ON -DENABLE_SQL=ON -DUSE_REAL_ODBC_TEST_DRIVER=ON -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_RULE_MESSAGES=OFF -DSTRICT_GSL_CHECKS=AUDIT -DFAIL_ON_WARNINGS=ON .. && cmake --build . --parallel 4 + ./bootstrap.sh -e -t && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="${CPPFLAGS} ${CFLAGS}" -DCMAKE_CXX_FLAGS="${CPPFLAGS} ${CXXFLAGS}" -DENABLE_LUA_SCRIPTING=ON -DENABLE_SQL=ON -DUSE_REAL_ODBC_TEST_DRIVER=ON -DENABLE_AZURE=ON -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_RULE_MESSAGES=OFF -DSTRICT_GSL_CHECKS=AUDIT -DFAIL_ON_WARNINGS=ON .. && cmake --build . --parallel 4 - name: test run: cd build && make test ARGS="--timeout 300 -j4 --output-on-failure" - name: linter diff --git a/cmake/BundledAzureSdkCpp.cmake b/cmake/BundledAzureSdkCpp.cmake index 939115a5c9..a2d72afa3a 100644 --- a/cmake/BundledAzureSdkCpp.cmake +++ b/cmake/BundledAzureSdkCpp.cmake @@ -16,13 +16,9 @@ # under the License. function(use_bundled_libazure SOURCE_DIR BINARY_DIR) - set(PATCH_FILE1 "${SOURCE_DIR}/thirdparty/azure-sdk-cpp-for-cpp/azure-sdk-for-cpp-old-compiler.patch") - set(PATCH_FILE2 "${SOURCE_DIR}/thirdparty/azure-sdk-cpp-for-cpp/fix-illegal-qualified-name-in-member.patch") - set(PC ${Bash_EXECUTABLE} -c "set -x &&\ - (\"${Patch_EXECUTABLE}\" -p1 -R -s -f --dry-run -i \"${PATCH_FILE1}\" || \"${Patch_EXECUTABLE}\" -p1 -N -i \"${PATCH_FILE1}\") &&\ - (\"${Patch_EXECUTABLE}\" -p1 -R -s -f --dry-run -i \"${PATCH_FILE2}\" || \"${Patch_EXECUTABLE}\" -p1 -N -i \"${PATCH_FILE2}\") ") - - + set(PATCH_FILE "${SOURCE_DIR}/thirdparty/azure-sdk-cpp/azure-sdk-cpp-openssl-include-fix.patch") + set(PC ${Bash_EXECUTABLE} -c "set -x && \ + (\"${Patch_EXECUTABLE}\" -p1 -R -s -f --dry-run -i \"${PATCH_FILE}\" || \"${Patch_EXECUTABLE}\" -p1 -N -i \"${PATCH_FILE}\")") # Define byproducts if (WIN32) set(SUFFIX "lib") @@ -31,6 +27,7 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) set(AZURE_STORAGE_COMMON_LIB "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/storage/azure-storage-common/${CMAKE_BUILD_TYPE}/${PREFIX}azure-storage-common.${SUFFIX}") set(AZURE_STORAGE_BLOBS_LIB "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/storage/azure-storage-blobs/${CMAKE_BUILD_TYPE}/${PREFIX}azure-storage-blobs.${SUFFIX}") set(AZURE_IDENTITY_LIB "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/identity/azure-identity/${CMAKE_BUILD_TYPE}/${PREFIX}azure-identity.${SUFFIX}") + set(AZURE_STORAGE_FILES_DATALAKE_LIB "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/storage/azure-storage-files-datalake/${CMAKE_BUILD_TYPE}/${PREFIX}azure-storage-files-datalake.${SUFFIX}") else() set(SUFFIX "a") set(PREFIX "lib") @@ -38,13 +35,15 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) set(AZURE_STORAGE_COMMON_LIB "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/storage/azure-storage-common/${PREFIX}azure-storage-common.${SUFFIX}") set(AZURE_STORAGE_BLOBS_LIB "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/storage/azure-storage-blobs/${PREFIX}azure-storage-blobs.${SUFFIX}") set(AZURE_IDENTITY_LIB "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/identity/azure-identity/${PREFIX}azure-identity.${SUFFIX}") + set(AZURE_STORAGE_FILES_DATALAKE_LIB "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/storage/azure-storage-files-datalake/${PREFIX}azure-storage-files-datalake.${SUFFIX}") endif() set(AZURESDK_LIBRARIES_LIST "${AZURE_CORE_LIB}" "${AZURE_STORAGE_COMMON_LIB}" "${AZURE_STORAGE_BLOBS_LIB}" - "${AZURE_IDENTITY_LIB}") + "${AZURE_IDENTITY_LIB}" + "${AZURE_STORAGE_FILES_DATALAKE_LIB}") set(AZURE_SDK_CMAKE_ARGS ${PASSTHROUGH_CMAKE_ARGS} -DWARNINGS_AS_ERRORS=OFF) @@ -54,7 +53,7 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) ExternalProject_Add( azure-sdk-cpp-external GIT_REPOSITORY "https://github.com/Azure/azure-sdk-for-cpp.git" - GIT_TAG "azure-storage-blobs_12.0.0-beta.7" + GIT_TAG "azure-storage-files-datalake_12.0.1" BUILD_IN_SOURCE true SOURCE_DIR "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src" BUILD_BYPRODUCTS "${AZURESDK_LIBRARIES_LIST}" @@ -66,7 +65,7 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) ) # Set dependencies - add_dependencies(azure-sdk-cpp-external-build CURL::libcurl LibXml2::LibXml2 OpenSSL::Crypto OpenSSL::SSL nlohmann_json::nlohmann_json) + add_dependencies(azure-sdk-cpp-external-build LibXml2::LibXml2 OpenSSL::Crypto OpenSSL::SSL) # Set variables set(LIBAZURE_FOUND "YES" CACHE STRING "" FORCE) @@ -87,10 +86,7 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) set_target_properties(AZURE::azure-core PROPERTIES IMPORTED_LOCATION "${AZURE_CORE_LIB}") add_dependencies(AZURE::azure-core azure-sdk-cpp-external-build) target_include_directories(AZURE::azure-core INTERFACE ${LIBAZURE_INCLUDE_DIRS}) - target_link_libraries(AZURE::azure-core INTERFACE LibXml2::LibXml2 CURL::libcurl OpenSSL::Crypto OpenSSL::SSL Threads::Threads nlohmann_json::nlohmann_json) - if (APPLE) - target_link_libraries(AZURE::azure-core INTERFACE "-framework CoreFoundation") - endif() + target_link_libraries(AZURE::azure-core INTERFACE OpenSSL::Crypto OpenSSL::SSL) if (WIN32) target_link_libraries(AZURE::azure-core INTERFACE winhttp.lib) endif() @@ -99,36 +95,20 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) set_target_properties(AZURE::azure-identity PROPERTIES IMPORTED_LOCATION "${AZURE_IDENTITY_LIB}") add_dependencies(AZURE::azure-identity azure-sdk-cpp-external-build) target_include_directories(AZURE::azure-identity INTERFACE ${LIBAZURE_INCLUDE_DIRS}) - target_link_libraries(AZURE::azure-identity INTERFACE LibXml2::LibXml2 CURL::libcurl OpenSSL::Crypto OpenSSL::SSL Threads::Threads nlohmann_json::nlohmann_json) - if (APPLE) - target_link_libraries(AZURE::azure-identity INTERFACE "-framework CoreFoundation") - endif() - if (WIN32) - target_link_libraries(AZURE::azure-identity INTERFACE winhttp.lib) - endif() add_library(AZURE::azure-storage-common STATIC IMPORTED) set_target_properties(AZURE::azure-storage-common PROPERTIES IMPORTED_LOCATION "${AZURE_STORAGE_COMMON_LIB}") add_dependencies(AZURE::azure-storage-common azure-sdk-cpp-external-build) target_include_directories(AZURE::azure-storage-common INTERFACE ${LIBAZURE_INCLUDE_DIRS}) - target_link_libraries(AZURE::azure-storage-common INTERFACE LibXml2::LibXml2 CURL::libcurl OpenSSL::Crypto OpenSSL::SSL Threads::Threads nlohmann_json::nlohmann_json) - if (APPLE) - target_link_libraries(AZURE::azure-storage-common INTERFACE "-framework CoreFoundation") - endif() - if (WIN32) - target_link_libraries(AZURE::azure-storage-common INTERFACE winhttp.lib) - endif() + target_link_libraries(AZURE::azure-storage-common INTERFACE LibXml2::LibXml2) add_library(AZURE::azure-storage-blobs STATIC IMPORTED) set_target_properties(AZURE::azure-storage-blobs PROPERTIES IMPORTED_LOCATION "${AZURE_STORAGE_BLOBS_LIB}") add_dependencies(AZURE::azure-storage-blobs azure-sdk-cpp-external-build) target_include_directories(AZURE::azure-storage-blobs INTERFACE ${LIBAZURE_INCLUDE_DIRS}) - target_link_libraries(AZURE::azure-storage-blobs INTERFACE LibXml2::LibXml2 CURL::libcurl OpenSSL::Crypto OpenSSL::SSL Threads::Threads nlohmann_json::nlohmann_json) - if (APPLE) - target_link_libraries(AZURE::azure-storage-blobs INTERFACE "-framework CoreFoundation") - endif() - if (WIN32) - target_link_libraries(AZURE::azure-storage-blobs INTERFACE winhttp.lib) - endif() - add_definitions("-DBUILD_CURL_HTTP_TRANSPORT_ADAPTER") + + add_library(AZURE::azure-storage-files-datalake STATIC IMPORTED) + set_target_properties(AZURE::azure-storage-files-datalake PROPERTIES IMPORTED_LOCATION "${AZURE_STORAGE_FILES_DATALAKE_LIB}") + add_dependencies(AZURE::azure-storage-files-datalake azure-sdk-cpp-external-build) + target_include_directories(AZURE::azure-storage-files-datalake INTERFACE ${LIBAZURE_INCLUDE_DIRS}) endfunction(use_bundled_libazure) diff --git a/extensions/azure/CMakeLists.txt b/extensions/azure/CMakeLists.txt index b8a8abcf43..93047438d4 100644 --- a/extensions/azure/CMakeLists.txt +++ b/extensions/azure/CMakeLists.txt @@ -32,7 +32,7 @@ target_include_directories(minifi-azure BEFORE PRIVATE ${CMAKE_SOURCE_DIR}/exten target_link_libraries(minifi-azure ${LIBMINIFI} Threads::Threads) target_link_libraries(minifi-azure CURL::libcurl LibXml2::LibXml2) -target_link_libraries(minifi-azure AZURE::azure-storage-blobs AZURE::azure-storage-common AZURE::azure-core) +target_link_libraries(minifi-azure AZURE::azure-storage-files-datalake AZURE::azure-storage-blobs AZURE::azure-storage-common AZURE::azure-core) if (WIN32) target_link_libraries(minifi-azure crypt32.lib bcrypt.lib) diff --git a/extensions/azure/storage/AzureBlobStorage.cpp b/extensions/azure/storage/AzureBlobStorage.cpp index 8be672986f..5d3fb46b04 100644 --- a/extensions/azure/storage/AzureBlobStorage.cpp +++ b/extensions/azure/storage/AzureBlobStorage.cpp @@ -60,17 +60,14 @@ std::optional AzureBlobStorage::uploadBlob(const std::string & try { auto blob_client = container_client_->GetBlockBlobClient(blob_name); auto response = blob_client.UploadFrom(buffer, buffer_size); - if (!response.HasValue()) { - return std::nullopt; - } UploadBlobResult result; result.length = buffer_size; result.primary_uri = container_client_->GetUrl(); - if (response->ETag.HasValue()) { - result.etag = response->ETag.ToString(); + if (response.Value.ETag.HasValue()) { + result.etag = response.Value.ETag.ToString(); } - result.timestamp = response->LastModified.GetString(Azure::Core::DateTime::DateFormat::Rfc1123); + result.timestamp = response.Value.LastModified.ToString(Azure::DateTime::DateFormat::Rfc1123); return result; } catch (const std::runtime_error& err) { logger_->log_error("A runtime error occurred while uploading blob: %s", err.what()); diff --git a/thirdparty/azure-sdk-cpp-for-cpp/azure-sdk-for-cpp-old-compiler.patch b/thirdparty/azure-sdk-cpp-for-cpp/azure-sdk-for-cpp-old-compiler.patch deleted file mode 100644 index c54af17e3f..0000000000 --- a/thirdparty/azure-sdk-cpp-for-cpp/azure-sdk-for-cpp-old-compiler.patch +++ /dev/null @@ -1,42 +0,0 @@ -diff -rupN orig/sdk/core/azure-core/inc/azure/core/context.hpp patched/sdk/core/azure-core/inc/azure/core/context.hpp ---- orig/sdk/core/azure-core/inc/azure/core/context.hpp 2021-02-03 09:06:18.580502882 +0000 -+++ patched/sdk/core/azure-core/inc/azure/core/context.hpp 2021-02-03 09:07:11.302899054 +0000 -@@ -255,7 +255,7 @@ namespace Azure { namespace Core { - struct ContextSharedState - { - std::shared_ptr Parent; -- std::atomic_int64_t CancelAtMsecSinceEpoch; -+ std::atomic CancelAtMsecSinceEpoch; - std::string Key; - ContextValue Value; - -diff -rupN orig/sdk/core/azure-core/src/http/policy.cpp patched/sdk/core/azure-core/src/http/policy.cpp ---- orig/sdk/core/azure-core/src/http/policy.cpp 2021-02-03 09:10:44.454678199 +0000 -+++ patched/sdk/core/azure-core/src/http/policy.cpp 2021-02-03 09:11:15.535238932 +0000 -@@ -10,10 +10,10 @@ using namespace Azure::Core::Http; - #ifndef _MSC_VER - // Non-MSVC compilers do require allocation of statics, even if they are const constexpr. - // MSVC, on the other hand, has problem if you "redefine" static constexprs. --Azure::Core::Logging::LogClassification const Azure::Core::Http::LogClassification::Request; --Azure::Core::Logging::LogClassification const Azure::Core::Http::LogClassification::Response; --Azure::Core::Logging::LogClassification const Azure::Core::Http::LogClassification::Retry; --Azure::Core::Logging::LogClassification const -+constexpr Azure::Core::Logging::LogClassification const Azure::Core::Http::LogClassification::Request; -+constexpr Azure::Core::Logging::LogClassification const Azure::Core::Http::LogClassification::Response; -+constexpr Azure::Core::Logging::LogClassification const Azure::Core::Http::LogClassification::Retry; -+constexpr Azure::Core::Logging::LogClassification const - Azure::Core::Http::LogClassification::HttpTransportAdapter; - #endif - -diff -rupN orig/sdk/keyvault/azure-security-keyvault-keys/src/key_client.cpp patched/sdk/keyvault/azure-security-keyvault-keys/src/key_client.cpp ---- orig/sdk/keyvault/azure-security-keyvault-keys/src/key_client.cpp 2021-02-10 10:35:03.305252930 +0100 -+++ patched/sdk/keyvault/azure-security-keyvault-keys/src/key_client.cpp 2021-02-10 16:11:25.139169400 +0100 -@@ -16,7 +16,7 @@ using namespace Azure::Core::Http; - - KeyClient::KeyClient( - std::string const& vaultUrl, -- std::shared_ptr credential, -+ std::shared_ptr credential, - KeyClientOptions options) - { - auto apiVersion = options.GetVersionString(); diff --git a/thirdparty/azure-sdk-cpp-for-cpp/fix-illegal-qualified-name-in-member.patch b/thirdparty/azure-sdk-cpp-for-cpp/fix-illegal-qualified-name-in-member.patch deleted file mode 100644 index 938da9000a..0000000000 --- a/thirdparty/azure-sdk-cpp-for-cpp/fix-illegal-qualified-name-in-member.patch +++ /dev/null @@ -1,14 +0,0 @@ -diff -rupN orig/sdk/core/azure-core/inc/azure/core/http/winhttp/win_http_client.hpp patched/sdk/core/azure-core/inc/azure/core/http/winhttp/win_http_client.hpp ---- orig/sdk/core/azure-core/inc/azure/core/http/winhttp/win_http_client.hpp 2021-06-07 16:51:23.179818286 +0200 -+++ patched/sdk/core/azure-core/inc/azure/core/http/winhttp/win_http_client.hpp 2021-06-07 16:51:55.536150585 +0200 -@@ -132,8 +132,8 @@ namespace Azure { namespace Core { names - void CreateRequestHandle(std::unique_ptr& handleManager); - void Upload(std::unique_ptr& handleManager); - void SendRequest(std::unique_ptr& handleManager); -- void WinHttpTransport::ReceiveResponse(std::unique_ptr& handleManager); -- int64_t WinHttpTransport::GetContentLength( -+ void ReceiveResponse(std::unique_ptr& handleManager); -+ int64_t GetContentLength( - std::unique_ptr& handleManager, - HttpMethod requestMethod, - HttpStatusCode responseStatusCode); diff --git a/thirdparty/azure-sdk-cpp/azure-sdk-cpp-openssl-include-fix.patch b/thirdparty/azure-sdk-cpp/azure-sdk-cpp-openssl-include-fix.patch new file mode 100644 index 0000000000..6050900028 --- /dev/null +++ b/thirdparty/azure-sdk-cpp/azure-sdk-cpp-openssl-include-fix.patch @@ -0,0 +1,14 @@ +# Issue presented in https://github.com/Azure/azure-sdk-for-cpp/issues/2560 +diff --git a/sdk/core/azure-core/CMakeLists.txt b/sdk/core/azure-core/CMakeLists.txt +index 12f57af0..1d8f3398 100644 +--- a/sdk/core/azure-core/CMakeLists.txt ++++ b/sdk/core/azure-core/CMakeLists.txt +@@ -142,7 +142,7 @@ if(WIN32) + target_link_libraries(azure-core PRIVATE bcrypt crypt32) + else() + find_package(OpenSSL REQUIRED) +- target_link_libraries(azure-core PRIVATE OpenSSL::SSL) ++ target_link_libraries(azure-core PUBLIC OpenSSL::SSL) + endif() + + if(BUILD_TRANSPORT_CURL) From 83a0b9b7c4d137194842476b0e1a78a2ed985a64 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Wed, 28 Jul 2021 16:13:48 +0200 Subject: [PATCH 02/36] Remove unnecessary nlohmann json library --- CMakeLists.txt | 3 -- LICENSE | 25 ------------- NOTICE | 1 - cmake/NlohmannJson.cmake | 35 ------------------- .../dummy/Findnlohmann_json.cmake | 27 -------------- 5 files changed, 91 deletions(-) delete mode 100644 cmake/NlohmannJson.cmake delete mode 100644 cmake/nlohmann_json/dummy/Findnlohmann_json.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index ca0b484b87..88f3443ce9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -581,9 +581,6 @@ endif() ## Azure Extensions if (ENABLE_ALL OR ENABLE_AZURE) - include(NlohmannJson) - list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/nlohmann_json/dummy") - include(BundledAzureSdkCpp) use_bundled_libazure(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) createExtension(AZURE-EXTENSIONS "AZURE EXTENSIONS" "This enables Azure support" "extensions/azure" "${TEST_DIR}/azure-tests") diff --git a/LICENSE b/LICENSE index 848e135be8..8007158eb6 100644 --- a/LICENSE +++ b/LICENSE @@ -2924,31 +2924,6 @@ SOFTWARE -------------------------------------------------------------------------- -This product bundles 'JSON for Modern C++' which is available under a MIT license: -MIT License - -Copyright (c) 2013-2021 Niels Lohmann - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------- - This project bundles 'range-v3' which is available under the Boost Software License. It includes code from a number of other projects, all of which have their own BSD-like or MIT licenses. diff --git a/NOTICE b/NOTICE index 0a875f60db..b8cf4a215d 100644 --- a/NOTICE +++ b/NOTICE @@ -57,7 +57,6 @@ This software includes third party software subject to the following copyrights: - libsodium - Copyright (c) 2013 - 2018 Frank Denis under the ISC software license - IANA timezone database - public domain - date (HowardHinnant/date) - notices below -- JSON for Modern C++ (nlohmann/json) - Copyright (c) 2013-2021 Niels Lohmann - range-v3 - Eric Niebler and other contributors The licenses for these third party components are included in LICENSE.txt diff --git a/cmake/NlohmannJson.cmake b/cmake/NlohmannJson.cmake deleted file mode 100644 index 3b1f38111f..0000000000 --- a/cmake/NlohmannJson.cmake +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -include(FetchContent) - -FetchContent_Declare(nlohmann_json - GIT_REPOSITORY https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent - GIT_TAG "v3.9.1") - -FetchContent_MakeAvailable(nlohmann_json) - -FetchContent_GetProperties(nlohmann_json) -if(NOT nlohmann_json_POPULATED) - FetchContent_Populate(nlohmann_json) - add_subdirectory(${nlohmann_json_SOURCE_DIR} ${nlohmann_json_BINARY_DIR} EXCLUDE_FROM_ALL) -endif() - -set(NLOHMANN_JSON_INCLUDE_DIR "${nlohmann_json_SOURCE_DIR}/include") - -# Set exported variables for FindPackage.cmake -set(PASSTHROUGH_VARIABLES ${PASSTHROUGH_VARIABLES} "-DEXPORTED_NLOHMANN_JSON_INCLUDE_DIR=${NLOHMANN_JSON_INCLUDE_DIR}" CACHE STRING "" FORCE) diff --git a/cmake/nlohmann_json/dummy/Findnlohmann_json.cmake b/cmake/nlohmann_json/dummy/Findnlohmann_json.cmake deleted file mode 100644 index 6a3cb1b332..0000000000 --- a/cmake/nlohmann_json/dummy/Findnlohmann_json.cmake +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -if(NOT NLOHMANN_JSON_FOUND) - set(NLOHMANN_JSON_FOUND "YES" CACHE STRING "" FORCE) - set(NLOHMANN_JSON_INCLUDE_DIR "${EXPORTED_NLOHMANN_JSON_INCLUDE_DIR}" CACHE STRING "" FORCE) - set(NLOHMANN_JSON_INCLUDE_DIRS "${EXPORTED_NLOHMANN_JSON_INCLUDE_DIR}" CACHE STRING "" FORCE) -endif() - -if(NOT TARGET nlohmann_json::nlohmann_json) - add_library(nlohmann_json::nlohmann_json STATIC IMPORTED) - set_property(TARGET nlohmann_json::nlohmann_json APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${NLOHMANN_JSON_INCLUDE_DIR}") -endif() From 92f640051d897cd256f1b29dd2d3f3e5e106658b Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Thu, 29 Jul 2021 10:44:57 +0200 Subject: [PATCH 03/36] Implement PutAzureDataLakeStorage processor --- cmake/BundledAzureSdkCpp.cmake | 1 + .../processors/PutAzureDataLakeStorage.cpp | 178 ++++++++++++++++++ .../processors/PutAzureDataLakeStorage.h | 130 +++++++++++++ .../azure/storage/AzureDataLakeStorage.cpp | 57 ++++++ .../azure/storage/AzureDataLakeStorage.h | 67 +++++++ .../storage/AzureDataLakeStorageClient.cpp | 72 +++++++ .../storage/AzureDataLakeStorageClient.h | 57 ++++++ .../azure/storage/DataLakeStorageClient.h | 51 +++++ 8 files changed, 613 insertions(+) create mode 100644 extensions/azure/processors/PutAzureDataLakeStorage.cpp create mode 100644 extensions/azure/processors/PutAzureDataLakeStorage.h create mode 100644 extensions/azure/storage/AzureDataLakeStorage.cpp create mode 100644 extensions/azure/storage/AzureDataLakeStorage.h create mode 100644 extensions/azure/storage/AzureDataLakeStorageClient.cpp create mode 100644 extensions/azure/storage/AzureDataLakeStorageClient.h create mode 100644 extensions/azure/storage/DataLakeStorageClient.h diff --git a/cmake/BundledAzureSdkCpp.cmake b/cmake/BundledAzureSdkCpp.cmake index a2d72afa3a..ec169c606b 100644 --- a/cmake/BundledAzureSdkCpp.cmake +++ b/cmake/BundledAzureSdkCpp.cmake @@ -74,6 +74,7 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/storage/azure-storage-blobs/inc/" "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/storage/azure-storage-common/inc/" "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/identity/azure-identity/inc/" + "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src/sdk/storage/azure-storage-files-datalake/inc/" CACHE STRING "" FORCE) set(LIBAZURE_LIBRARIES ${AZURESDK_LIBRARIES_LIST} CACHE STRING "" FORCE) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp new file mode 100644 index 0000000000..9e710a68ef --- /dev/null +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -0,0 +1,178 @@ +/** + * @file PutAzureDataLakeStorage.cpp + * PutAzureDataLakeStorage class implementation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PutAzureDataLakeStorage.h" + +#include "utils/ProcessorConfigUtils.h" +#include "controllerservices/AzureStorageCredentialsService.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace azure { +namespace processors { + +const std::set PutAzureDataLakeStorage::CONFLICT_RESOLUTION_STRATEGIES({"fail", "replace", "ignore"}); + +const core::Property PutAzureDataLakeStorage::AzureStorageCredentialsService( + core::PropertyBuilder::createProperty("Azure Storage Credentials Service") + ->withDescription("Name of the Azure Storage Credentials Service used to retrieve the connection string from.") + ->isRequired(true) + ->build()); +const core::Property PutAzureDataLakeStorage::FilesystemName( + core::PropertyBuilder::createProperty("Filesystem Name") + ->withDescription("Name of the Azure Storage File System. It is assumed to be already existing.") + ->supportsExpressionLanguage(true) + ->isRequired(true) + ->build()); +const core::Property PutAzureDataLakeStorage::DirectoryName( + core::PropertyBuilder::createProperty("Directory Name") + ->withDescription("Name of the Azure Storage Directory. The Directory Name cannot contain a leading '/'. " + "The root directory can be designated by the empty string value. In case of the PutAzureDataLakeStorage processor, the directory will be created if not already existing.") + ->supportsExpressionLanguage(true) + ->isRequired(true) + ->build()); +const core::Property PutAzureDataLakeStorage::FileName( + core::PropertyBuilder::createProperty("File Name") + ->withDescription("The filename") + ->supportsExpressionLanguage(true) + ->build()); +const core::Property PutAzureDataLakeStorage::ConflictResolutionStrategy( + core::PropertyBuilder::createProperty("Conflict Resolution Strategy") + ->withDescription("Indicates what should happen when a file with the same name already exists in the output directory") + ->supportsExpressionLanguage(true) + ->isRequired(true) + ->withDefaultValue("fail") + ->withAllowableValues(CONFLICT_RESOLUTION_STRATEGIES) + ->build()); + +const core::Relationship PutAzureDataLakeStorage::Success("success", "Files that have been successfully written to Azure storage are transferred to this relationship"); +const core::Relationship PutAzureDataLakeStorage::Failure("failure", "Files that could not be written to Azure storage for some reason are transferred to this relationship"); + +void PutAzureDataLakeStorage::initialize() { + // Set the supported properties + setSupportedProperties({ + AzureStorageCredentialsService, + FilesystemName, + DirectoryName, + FileName, + ConflictResolutionStrategy + }); + // Set the supported relationships + setSupportedRelationships({ + Success, + Failure + }); +} + +std::string PutAzureDataLakeStorage::getConnectionStringFromControllerService(const std::shared_ptr &context) const { + std::string service_name; + if (!context->getProperty(AzureStorageCredentialsService.getName(), service_name) || service_name.empty()) { + return ""; + } + + auto service = context->getControllerService(service_name); + if (nullptr == service) { + logger_->log_error("Azure Storage credentials service with name: '%s' could not be found", service_name.c_str()); + return ""; + } + + auto azure_credentials_service = std::dynamic_pointer_cast(service); + if (!azure_credentials_service) { + logger_->log_error("Controller service with name: '%s' is not an Azure Storage credentials service", service_name.c_str()); + return ""; + } + + return azure_credentials_service->getConnectionString(); +} + +void PutAzureDataLakeStorage::onSchedule(const std::shared_ptr& context, const std::shared_ptr& /*sessionFactory*/) { + std::string value; + if (!context->getProperty(FilesystemName.getName(), value) || value.empty()) { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Filesystem Name property missing or invalid"); + } + + if (!context->getProperty(DirectoryName.getName(), value) || value.empty()) { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Directory Name property missing or invalid"); + } + + connection_string_ = getConnectionStringFromControllerService(context); + if (connection_string_.empty()) { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Azure Storage Credentials Service property missing or invalid"); + } + + conflict_resolution_strategy_ = utils::parsePropertyWithAllowableValuesOrThrow(*context, ConflictResolutionStrategy.getName(), CONFLICT_RESOLUTION_STRATEGIES); +} + +void PutAzureDataLakeStorage::onTrigger(const std::shared_ptr& context, const std::shared_ptr& session) { + logger_->log_debug("PutAzureDataLakeStorage onTrigger"); + std::shared_ptr flow_file = session->get(); + if (!flow_file) { + context->yield(); + return; + } + + storage::PutAzureDataLakeStorageParameters params; + params.connection_string = connection_string_; + params.replace_file = conflict_resolution_strategy_ == "replace"; + + if (!context->getProperty(FilesystemName, params.file_system_name, flow_file) || params.file_system_name.empty()) { + logger_->log_error("Filesystem Name '%s' is invalid or empty!", params.file_system_name); + session->transfer(flow_file, Failure); + return; + } + + if (!context->getProperty(DirectoryName, params.directory_name, flow_file) || params.directory_name.empty()) { + logger_->log_error("Directory Name '%s' is invalid or empty!", params.directory_name); + session->transfer(flow_file, Failure); + return; + } + + context->getProperty(FileName, params.filename, flow_file); + if (params.filename.empty() && (!flow_file->getAttribute("filename", params.filename) || params.filename.empty())) { + logger_->log_error("No File Name is set and default object key 'filename' attribute could not be found!"); + session->transfer(flow_file, Failure); + return; + } + + PutAzureDataLakeStorage::ReadCallback callback(flow_file->getSize(), azure_data_lake_storage_, params); + session->read(flow_file, &callback); + auto result = callback.getResult(); + if (result == std::nullopt) { + logger_->log_error("Failed to upload file '%s' to Azura Data Lake storage", params.filename); + session->transfer(flow_file, Failure); + } else { + session->putAttribute(flow_file, "azure.filesystem", params.file_system_name); + session->putAttribute(flow_file, "azure.directory", params.directory_name); + session->putAttribute(flow_file, "azure.filename", params.filename); + session->putAttribute(flow_file, "azure.primaryUri", result->primary_uri); + session->putAttribute(flow_file, "azure.length", std::to_string(result->length)); + logger_->log_debug("Successfully uploaded file '%s' to Azura Data Lake storage", params.filename); + session->transfer(flow_file, Success); + } +} + +} // namespace processors +} // namespace azure +} // namespace minifi +} // namespace nifi +} // namespace apache +} // namespace org diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h new file mode 100644 index 0000000000..9268a19afc --- /dev/null +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -0,0 +1,130 @@ +/** + * @file PutAzureDataLakeStorage.h + * PutAzureDataLakeStorage class declaration + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "core/Property.h" +#include "core/Processor.h" +#include "core/logging/Logger.h" +#include "core/logging/LoggerConfiguration.h" +#include "storage/AzureDataLakeStorage.h" +#include "storage/AzureDataLakeStorageClient.h" + +class PutAzureDataLakeStorageTestsFixture; + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace azure { +namespace processors { + +class PutAzureDataLakeStorage final : public core::Processor { + public: + static const std::set CONFLICT_RESOLUTION_STRATEGIES; + + static constexpr char const* ProcessorName = "PutAzureDataLakeStorage"; + + // Supported Properties + static const core::Property AzureStorageCredentialsService; + static const core::Property FilesystemName; + static const core::Property DirectoryName; + static const core::Property FileName; + static const core::Property ConflictResolutionStrategy; + + // Supported Relationships + static const core::Relationship Failure; + static const core::Relationship Success; + + explicit PutAzureDataLakeStorage(const std::string& name, const minifi::utils::Identifier& uuid = minifi::utils::Identifier()) + : core::Processor(name, uuid) { + } + + ~PutAzureDataLakeStorage() override = default; + + void initialize() override; + void onSchedule(const std::shared_ptr &context, const std::shared_ptr &sessionFactory) override; + void onTrigger(const std::shared_ptr &context, const std::shared_ptr &session) override; + + private: + class ReadCallback : public InputStreamCallback { + public: + ReadCallback(uint64_t flow_size, storage::AzureDataLakeStorage& azure_data_lake_storage, const storage::PutAzureDataLakeStorageParameters& params) + : flow_size_(flow_size) + , azure_data_lake_storage_(azure_data_lake_storage) + , params_(params) { + } + + int64_t process(const std::shared_ptr& stream) override { + std::vector buffer; + int read_ret = stream->read(buffer, flow_size_); + if (read_ret < 0) { + return -1; + } + + result_ = azure_data_lake_storage_.uploadFile(params_, buffer.data(), flow_size_); + return read_ret; + } + + utils::optional getResult() const { + return result_; + } + + private: + uint64_t flow_size_; + storage::AzureDataLakeStorage& azure_data_lake_storage_; + const storage::PutAzureDataLakeStorageParameters& params_; + std::optional result_ = std::nullopt; + }; + + core::annotation::Input getInputRequirement() const override { + return core::annotation::Input::INPUT_REQUIRED; + } + + friend class PutAzureDataLakeStorageTestsFixture; + + explicit PutAzureDataLakeStorage(const std::string& name, const minifi::utils::Identifier& uuid, std::unique_ptr data_lake_storage_client) + : core::Processor(name, uuid), + azure_data_lake_storage_(std::move(data_lake_storage_client)) { + } + + std::string getConnectionStringFromControllerService(const std::shared_ptr &context) const; + + std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; + std::string connection_string_; + std::string conflict_resolution_strategy_; + storage::AzureDataLakeStorage azure_data_lake_storage_; +}; + +REGISTER_RESOURCE(PutAzureDataLakeStorage, "Puts content into an Azure Data Lake Storage Gen 2"); + +} // namespace processors +} // namespace azure +} // namespace minifi +} // namespace nifi +} // namespace apache +} // namespace org diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp new file mode 100644 index 0000000000..44da7b8412 --- /dev/null +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -0,0 +1,57 @@ +/** + * @file AzureDataLakeStorage.cpp + * AzureDataLakeStorage class implementation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AzureDataLakeStorage.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace azure { +namespace storage { + +std::optional AzureDataLakeStorage::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { + auto file_created = data_lake_storage_client_->createFile(params); + if (!file_created) { + return std::nullopt; + } + if (!file_created.value() && !params.replace_file) { + std::string message = "File " + params.filename + " already exists on Azure Data Lake Storage"; + logger_->log_error(message.c_str()); + throw FileAlreadyExistsException("File " + params.filename + " already exists on Azure Data Lake Storage"); + } + + auto upload_url = data_lake_storage_client_->uploadFile(params, buffer, buffer_size); + if (!upload_url) { + return std::nullopt; + } + + UploadDataLakeStorageResult result; + result.length = buffer_size; + result.primary_uri = upload_url.value(); + return result; +} + +} // namespace storage +} // namespace azure +} // namespace minifi +} // namespace nifi +} // namespace apache +} // namespace org diff --git a/extensions/azure/storage/AzureDataLakeStorage.h b/extensions/azure/storage/AzureDataLakeStorage.h new file mode 100644 index 0000000000..5b1b901450 --- /dev/null +++ b/extensions/azure/storage/AzureDataLakeStorage.h @@ -0,0 +1,67 @@ +/** + * @file AzureDataLakeStorage.h + * AzureDataLakeStorage class declaration + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include "core/logging/Logger.h" +#include "core/logging/LoggerConfiguration.h" +#include "AzureDataLakeStorageClient.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace azure { +namespace storage { + +struct UploadDataLakeStorageResult { + std::string primary_uri; + std::size_t length; +}; + +class AzureDataLakeStorage { + public: + class FileAlreadyExistsException : public std::runtime_error { + public: + explicit FileAlreadyExistsException(const std::string& msg) : std::runtime_error(msg) {} + }; + + AzureDataLakeStorage() : data_lake_storage_client_(std::make_unique()) {} + explicit AzureDataLakeStorage(std::unique_ptr data_lake_storage_client) : data_lake_storage_client_(std::move(data_lake_storage_client)) { + } + + std::optional uploadFile(const storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size); + + private: + std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; + std::unique_ptr data_lake_storage_client_; +}; + +} // namespace storage +} // namespace azure +} // namespace minifi +} // namespace nifi +} // namespace apache +} // namespace org diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.cpp b/extensions/azure/storage/AzureDataLakeStorageClient.cpp new file mode 100644 index 0000000000..4b702f7cb2 --- /dev/null +++ b/extensions/azure/storage/AzureDataLakeStorageClient.cpp @@ -0,0 +1,72 @@ +/** + * @file AzureDataLakeStorageClient.cpp + * AzureDataLakeStorageClient class implementation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AzureDataLakeStorageClient.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace azure { +namespace storage { + +void AzureDataLakeStorageClient::resetClientIfNeeded(const std::string& connection_string, const std::string& file_system_name) { + if (client_ == nullptr || connection_string != connection_string || file_system_name_ != file_system_name) { + client_ = std::make_unique( + Azure::Storage::Files::DataLake::DataLakeFileSystemClient::CreateFromConnectionString(connection_string, file_system_name)); + file_system_name_ = file_system_name; + connection_string_ = connection_string; + } +} + +std::optional AzureDataLakeStorageClient::createFile(const PutAzureDataLakeStorageParameters& params) { + try { + resetClientIfNeeded(params.connection_string, params.file_system_name); + auto directory_client = client_->GetDirectoryClient(params.directory_name); + directory_client.CreateIfNotExists(); + auto file_client = directory_client.GetFileClient(params.filename); + auto response = file_client.CreateIfNotExists(); + return response.Value.Created; + } catch (const std::runtime_error& err) { + logger_->log_error("A runtime error occurred while creating file in Data Lake storage: %s", err.what()); + return std::nullopt; + } +} + +std::optional AzureDataLakeStorageClient::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { + try { + resetClientIfNeeded(params.connection_string, params.file_system_name); + auto directory_client = client_->GetDirectoryClient(params.directory_name); + directory_client.CreateIfNotExists(); + auto file_client = directory_client.GetFileClient(params.filename); + file_client.UploadFrom(buffer, buffer_size); + return file_client.GetUrl(); + } catch (const std::runtime_error& err) { + logger_->log_error("A runtime error occurred while uploading blob: %s", err.what()); + return std::nullopt; + } +} + +} // namespace storage +} // namespace azure +} // namespace minifi +} // namespace nifi +} // namespace apache +} // namespace org diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.h b/extensions/azure/storage/AzureDataLakeStorageClient.h new file mode 100644 index 0000000000..e65ca447a2 --- /dev/null +++ b/extensions/azure/storage/AzureDataLakeStorageClient.h @@ -0,0 +1,57 @@ +/** + * @file AzureDataLakeStorageClient.h + * AzureDataLakeStorageClient class declaration + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include "DataLakeStorageClient.h" +#include "core/logging/Logger.h" +#include "core/logging/LoggerConfiguration.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace azure { +namespace storage { + +class AzureDataLakeStorageClient : public DataLakeStorageClient { + public: + std::optional createFile(const PutAzureDataLakeStorageParameters& params) override; + std::optional uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) override; + + private: + void resetClientIfNeeded(const std::string& connection_string, const std::string& file_system_name); + + std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; + std::string connection_string_; + std::string file_system_name_; + std::unique_ptr client_; +}; + +} // namespace storage +} // namespace azure +} // namespace minifi +} // namespace nifi +} // namespace apache +} // namespace org diff --git a/extensions/azure/storage/DataLakeStorageClient.h b/extensions/azure/storage/DataLakeStorageClient.h new file mode 100644 index 0000000000..01c4b50e95 --- /dev/null +++ b/extensions/azure/storage/DataLakeStorageClient.h @@ -0,0 +1,51 @@ +/** + * @file DataLakeStorageClient.h + * DataLakeStorageClient class declaration + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace azure { +namespace storage { + +struct PutAzureDataLakeStorageParameters { + std::string connection_string; + std::string file_system_name; + std::string directory_name; + std::string filename; + bool replace_file = false; +}; + +class DataLakeStorageClient { + public: + virtual std::optional createFile(const PutAzureDataLakeStorageParameters& params) = 0; + virtual std::optional uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) = 0; +}; + +} // namespace storage +} // namespace azure +} // namespace minifi +} // namespace nifi +} // namespace apache +} // namespace org From 7b507aa334154b58a9b6e303286c92e7646aa035 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Mon, 2 Aug 2021 16:19:29 +0200 Subject: [PATCH 04/36] Add tests for PutAzureDataLakeStorage processor --- .../processors/PutAzureDataLakeStorage.cpp | 9 - .../processors/PutAzureDataLakeStorage.h | 4 +- .../PutAzureDataLakeStorageTests.cpp | 232 ++++++++++++++++++ 3 files changed, 234 insertions(+), 11 deletions(-) create mode 100644 libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 9e710a68ef..3fe2c52a86 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -105,15 +105,6 @@ std::string PutAzureDataLakeStorage::getConnectionStringFromControllerService(co } void PutAzureDataLakeStorage::onSchedule(const std::shared_ptr& context, const std::shared_ptr& /*sessionFactory*/) { - std::string value; - if (!context->getProperty(FilesystemName.getName(), value) || value.empty()) { - throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Filesystem Name property missing or invalid"); - } - - if (!context->getProperty(DirectoryName.getName(), value) || value.empty()) { - throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Directory Name property missing or invalid"); - } - connection_string_ = getConnectionStringFromControllerService(context); if (connection_string_.empty()) { throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Azure Storage Credentials Service property missing or invalid"); diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h index 9268a19afc..9b34b9e10d 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.h +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -71,6 +71,8 @@ class PutAzureDataLakeStorage final : public core::Processor { void onTrigger(const std::shared_ptr &context, const std::shared_ptr &session) override; private: + friend class ::PutAzureDataLakeStorageTestsFixture; + class ReadCallback : public InputStreamCallback { public: ReadCallback(uint64_t flow_size, storage::AzureDataLakeStorage& azure_data_lake_storage, const storage::PutAzureDataLakeStorageParameters& params) @@ -105,8 +107,6 @@ class PutAzureDataLakeStorage final : public core::Processor { return core::annotation::Input::INPUT_REQUIRED; } - friend class PutAzureDataLakeStorageTestsFixture; - explicit PutAzureDataLakeStorage(const std::string& name, const minifi::utils::Identifier& uuid, std::unique_ptr data_lake_storage_client) : core::Processor(name, uuid), azure_data_lake_storage_(std::move(data_lake_storage_client)) { diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp new file mode 100644 index 0000000000..00cb8a5e5d --- /dev/null +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -0,0 +1,232 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../TestBase.h" +#include "utils/IntegrationTestUtils.h" +#include "core/Processor.h" +#include "processors/PutAzureDataLakeStorage.h" +#include "processors/GetFile.h" +#include "processors/PutFile.h" +#include "processors/LogAttribute.h" +#include "processors/UpdateAttribute.h" +#include "storage/DataLakeStorageClient.h" +#include "utils/file/FileUtils.h" +#include "controllerservices/AzureStorageCredentialsService.h" + +using namespace std::chrono_literals; + +const std::string FILESYSTEM_NAME = "testfilesystem"; +const std::string DIRECTORY_NAME = "testdir"; +const std::string FILE_NAME = "testfile.txt"; +const std::string CONNECTION_STRING = "test-connectionstring"; +const std::string TEST_DATA = "data123"; +const std::string GETFILE_FILE_NAME = "input_data.log"; + +class MockDataLakeStorageClient : public minifi::azure::storage::DataLakeStorageClient { + public: + const std::string PRIMARY_URI = "test-uri"; + + std::optional createFile(const minifi::azure::storage::PutAzureDataLakeStorageParameters& /*params*/) override { + return file_creation_error_ ? std::nullopt : std::make_optional(create_file_); + } + + std::optional uploadFile(const minifi::azure::storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) override { + input_data_ = std::string(buffer, buffer + buffer_size); + params_ = params; + + return upload_fails_ ? std::nullopt : std::make_optional(PRIMARY_URI); + } + + void setFileCreation(bool create_file) { + create_file_ = create_file; + } + + void setFileCreationError(bool file_creation_error) { + file_creation_error_ = file_creation_error; + } + + void setUploadFailure(bool upload_fails) { + upload_fails_ = upload_fails; + } + + minifi::azure::storage::PutAzureDataLakeStorageParameters getPassedParams() const { + return params_; + } + + private: + bool create_file_ = true; + bool file_creation_error_ = false; + bool upload_fails_ = false; + std::string input_data_; + minifi::azure::storage::PutAzureDataLakeStorageParameters params_; +}; + +class PutAzureDataLakeStorageTestsFixture { + public: + PutAzureDataLakeStorageTestsFixture() { + LogTestController::getInstance().setDebug(); + LogTestController::getInstance().setDebug(); + LogTestController::getInstance().setTrace(); + LogTestController::getInstance().setTrace(); + LogTestController::getInstance().setTrace(); + LogTestController::getInstance().setDebug(); + LogTestController::getInstance().setDebug(); + LogTestController::getInstance().setTrace(); + + // Build MiNiFi processing graph + plan_ = test_controller_.createPlan(); + auto mock_data_lake_storage_client = std::make_unique(); + mock_data_lake_storage_client_ptr_ = mock_data_lake_storage_client.get(); + put_azure_data_lake_storage_ = std::shared_ptr( + new minifi::azure::processors::PutAzureDataLakeStorage("PutAzureDataLakeStorage", utils::Identifier(), std::move(mock_data_lake_storage_client))); + auto input_dir = test_controller_.createTempDirectory("/tmp/gt.XXXXXX").str(); + std::ofstream input_file_stream(input_dir + utils::file::FileUtils::get_separator() + GETFILE_FILE_NAME); + input_file_stream << TEST_DATA; + input_file_stream.close(); + + get_file_ = plan_->addProcessor("GetFile", "GetFile"); + plan_->setProperty(get_file_, processors::GetFile::Directory.getName(), input_dir); + plan_->setProperty(get_file_, processors::GetFile::KeepSourceFile.getName(), "false"); + + update_attribute_ = plan_->addProcessor("UpdateAttribute", "UpdateAttribute", { {"success", "d"} }, true); + plan_->addProcessor(put_azure_data_lake_storage_, "PutAzureDataLakeStorage", { {"success", "d"}, {"failure", "d"} }, true); + auto logattribute = plan_->addProcessor("LogAttribute", "LogAttribute", { {"success", "d"} }, true); + logattribute->setAutoTerminatedRelationships({{"success", "d"}}); + + putfile_ = plan_->addProcessor("PutFile", "PutFile", { {"success", "d"} }, false); + plan_->addConnection(put_azure_data_lake_storage_, {"failure", "d"}, putfile_); + putfile_->setAutoTerminatedRelationships({{"success", "d"}}); + putfile_->setAutoTerminatedRelationships({{"failure", "d"}}); + output_dir_ = test_controller_.createTempDirectory("/tmp/gt.XXXXXX").str(); + plan_->setProperty(putfile_, org::apache::nifi::minifi::processors::PutFile::Directory.getName(), output_dir_); + + azure_storage_cred_service_ = plan_->addController("AzureStorageCredentialsService", "AzureStorageCredentialsService"); + setDefaultProperties(); + } + + std::vector getFailedFlowFileContents() { + std::vector file_contents; + + auto lambda = [&file_contents](const std::string& path, const std::string& filename) -> bool { + std::ifstream is(path + utils::file::FileUtils::get_separator() + filename, std::ifstream::binary); + std::string file_content((std::istreambuf_iterator(is)), std::istreambuf_iterator()); + file_contents.push_back(file_content); + return true; + }; + + utils::file::FileUtils::list_dir(output_dir_, lambda, plan_->getLogger(), false); + return file_contents; + } + + void setDefaultProperties() { + plan_->setProperty(put_azure_data_lake_storage_, minifi::azure::processors::PutAzureDataLakeStorage::AzureStorageCredentialsService.getName(), "AzureStorageCredentialsService"); + plan_->setProperty(update_attribute_, "test.filesystemname", FILESYSTEM_NAME, true); + plan_->setProperty(put_azure_data_lake_storage_, minifi::azure::processors::PutAzureDataLakeStorage::FilesystemName.getName(), "${test.filesystemname}"); + plan_->setProperty(update_attribute_, "test.directoryname", DIRECTORY_NAME, true); + plan_->setProperty(put_azure_data_lake_storage_, minifi::azure::processors::PutAzureDataLakeStorage::DirectoryName.getName(), "${test.directoryname}"); + plan_->setProperty(azure_storage_cred_service_, minifi::azure::controllers::AzureStorageCredentialsService::ConnectionString.getName(), CONNECTION_STRING); + } + + virtual ~PutAzureDataLakeStorageTestsFixture() { + LogTestController::getInstance().reset(); + } + + protected: + TestController test_controller_; + std::shared_ptr plan_; + MockDataLakeStorageClient* mock_data_lake_storage_client_ptr_; + std::shared_ptr put_azure_data_lake_storage_; + std::shared_ptr get_file_; + std::shared_ptr update_attribute_; + std::shared_ptr putfile_; + std::shared_ptr azure_storage_cred_service_; + std::string output_dir_; +}; + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Azure storage credentials service is empty", "[azureDataLakeStorageParameters]") { + plan_->setProperty(put_azure_data_lake_storage_, minifi::azure::processors::PutAzureDataLakeStorage::AzureStorageCredentialsService.getName(), ""); + REQUIRE_THROWS_AS(test_controller_.runSession(plan_, true), minifi::Exception); + REQUIRE(getFailedFlowFileContents().size() == 0); +} + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Filesystem name is not set", "[azureDataLakeStorageParameters]") { + plan_->setProperty(update_attribute_, "test.filesystemname", "", true); + test_controller_.runSession(plan_, true); + using org::apache::nifi::minifi::utils::verifyLogLinePresenceInPollTime; + REQUIRE(verifyLogLinePresenceInPollTime(1s, "Filesystem Name '' is invalid or empty!")); + auto failed_flowfiles = getFailedFlowFileContents(); + REQUIRE(failed_flowfiles.size() == 1); + REQUIRE(failed_flowfiles[0] == TEST_DATA); +} + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Directory name is not set", "[azureDataLakeStorageParameters]") { + plan_->setProperty(update_attribute_, "test.directoryname", "", true); + test_controller_.runSession(plan_, true); + using org::apache::nifi::minifi::utils::verifyLogLinePresenceInPollTime; + REQUIRE(verifyLogLinePresenceInPollTime(1s, "Directory Name '' is invalid or empty!")); + auto failed_flowfiles = getFailedFlowFileContents(); + REQUIRE(failed_flowfiles.size() == 1); + REQUIRE(failed_flowfiles[0] == TEST_DATA); +} + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Connection String is empty", "[azureDataLakeStorageParameters]") { + plan_->setProperty(azure_storage_cred_service_, minifi::azure::controllers::AzureStorageCredentialsService::ConnectionString.getName(), ""); + REQUIRE_THROWS_AS(test_controller_.runSession(plan_, true), minifi::Exception); + REQUIRE(getFailedFlowFileContents().size() == 0); +} + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Upload to Azure Data Lake Storage with default parameters", "[azureDataLakeStorageUpload]") { + test_controller_.runSession(plan_, true); + REQUIRE(getFailedFlowFileContents().size() == 0); + using org::apache::nifi::minifi::utils::verifyLogLinePresenceInPollTime; + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.directory value:" + DIRECTORY_NAME)); + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.filename value:" + GETFILE_FILE_NAME)); + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.filesystem value:" + FILESYSTEM_NAME)); + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.length value:" + std::to_string(TEST_DATA.size()))); + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.primaryUri value:" + mock_data_lake_storage_client_ptr_->PRIMARY_URI)); + auto passed_params = mock_data_lake_storage_client_ptr_->getPassedParams(); + REQUIRE(passed_params.connection_string == CONNECTION_STRING); + REQUIRE(passed_params.file_system_name == FILESYSTEM_NAME); + REQUIRE(passed_params.directory_name == DIRECTORY_NAME); + REQUIRE(passed_params.filename == GETFILE_FILE_NAME); + REQUIRE_FALSE(passed_params.replace_file); +} + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "File creation fails", "[azureDataLakeStorageUpload]") { + mock_data_lake_storage_client_ptr_->setFileCreationError(true); + test_controller_.runSession(plan_, true); + auto failed_flowfiles = getFailedFlowFileContents(); + REQUIRE(failed_flowfiles.size() == 1); + REQUIRE(failed_flowfiles[0] == TEST_DATA); +} + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "File upload fails", "[azureDataLakeStorageUpload]") { + mock_data_lake_storage_client_ptr_->setUploadFailure(true); + test_controller_.runSession(plan_, true); + auto failed_flowfiles = getFailedFlowFileContents(); + REQUIRE(failed_flowfiles.size() == 1); + REQUIRE(failed_flowfiles[0] == TEST_DATA); +} + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to failure in case of 'fail' resolution strategy", "[azureDataLakeStorageUpload]") { + mock_data_lake_storage_client_ptr_->setFileCreation(false); + test_controller_.runSession(plan_, true); + auto failed_flowfiles = getFailedFlowFileContents(); + REQUIRE(failed_flowfiles.size() == 1); + REQUIRE(failed_flowfiles[0] == TEST_DATA); +} From 3c4292252aa11a779fc0cfcb36c3b51e9fda9506 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Tue, 3 Aug 2021 16:27:54 +0200 Subject: [PATCH 05/36] Handle error cases --- .../processors/PutAzureDataLakeStorage.cpp | 14 +++++- .../processors/PutAzureDataLakeStorage.h | 23 ++++++++-- .../azure/storage/AzureDataLakeStorage.cpp | 21 +++++---- .../azure/storage/AzureDataLakeStorage.h | 7 ++- .../storage/AzureDataLakeStorageClient.cpp | 38 ++++++--------- .../storage/AzureDataLakeStorageClient.h | 4 +- .../azure/storage/DataLakeStorageClient.h | 5 +- .../PutAzureDataLakeStorageTests.cpp | 46 +++++++++++++++++-- 8 files changed, 105 insertions(+), 53 deletions(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 3fe2c52a86..a6edcc7340 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -21,6 +21,7 @@ #include "PutAzureDataLakeStorage.h" #include "utils/ProcessorConfigUtils.h" +#include "utils/gsl.h" #include "controllerservices/AzureStorageCredentialsService.h" namespace org { @@ -144,8 +145,19 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrgetSize(), azure_data_lake_storage_, params); + PutAzureDataLakeStorage::ReadCallback callback(flow_file->getSize(), azure_data_lake_storage_, params, logger_); session->read(flow_file, &callback); + if (callback.caughtFileAlreadyExistsError()) { + gsl_Expects(conflict_resolution_strategy_ != "replace"); + if (conflict_resolution_strategy_ == "fail") { + session->transfer(flow_file, Failure); + return; + } else if (conflict_resolution_strategy_ == "ignore") { + session->transfer(flow_file, Success); + return; + } + } + auto result = callback.getResult(); if (result == std::nullopt) { logger_->log_error("Failed to upload file '%s' to Azura Data Lake storage", params.filename); diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h index 9b34b9e10d..80ddf0e668 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.h +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -75,20 +75,29 @@ class PutAzureDataLakeStorage final : public core::Processor { class ReadCallback : public InputStreamCallback { public: - ReadCallback(uint64_t flow_size, storage::AzureDataLakeStorage& azure_data_lake_storage, const storage::PutAzureDataLakeStorageParameters& params) + ReadCallback(uint64_t flow_size, storage::AzureDataLakeStorage& azure_data_lake_storage, const storage::PutAzureDataLakeStorageParameters& params, std::shared_ptr logger) : flow_size_(flow_size) , azure_data_lake_storage_(azure_data_lake_storage) - , params_(params) { + , params_(params) + , logger_(std::move(logger)) { } int64_t process(const std::shared_ptr& stream) override { std::vector buffer; int read_ret = stream->read(buffer, flow_size_); - if (read_ret < 0) { + if (io::isError(read_ret)) { return -1; } - result_ = azure_data_lake_storage_.uploadFile(params_, buffer.data(), flow_size_); + try { + result_ = azure_data_lake_storage_.uploadFile(params_, buffer.data(), flow_size_); + } catch(const storage::AzureDataLakeStorage::FileAlreadyExistsException&) { + caught_file_already_exists_error_ = true; + } catch(const std::runtime_error& err) { + logger_->log_error("A runtime error occurred while uploading file to Azure Data Lake storage: %s", err.what()); + return read_ret; + } + return read_ret; } @@ -96,11 +105,17 @@ class PutAzureDataLakeStorage final : public core::Processor { return result_; } + bool caughtFileAlreadyExistsError() const { + return caught_file_already_exists_error_; + } + private: uint64_t flow_size_; storage::AzureDataLakeStorage& azure_data_lake_storage_; const storage::PutAzureDataLakeStorageParameters& params_; + bool caught_file_already_exists_error_ = false; std::optional result_ = std::nullopt; + std::shared_ptr logger_; }; core::annotation::Input getInputRequirement() const override { diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp index 44da7b8412..acbefff0c0 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.cpp +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -27,25 +27,26 @@ namespace minifi { namespace azure { namespace storage { -std::optional AzureDataLakeStorage::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { +AzureDataLakeStorage::AzureDataLakeStorage() + : data_lake_storage_client_(std::make_unique()) { +} + +AzureDataLakeStorage::AzureDataLakeStorage(std::unique_ptr data_lake_storage_client) + : data_lake_storage_client_(std::move(data_lake_storage_client)) { +} + +UploadDataLakeStorageResult AzureDataLakeStorage::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { auto file_created = data_lake_storage_client_->createFile(params); - if (!file_created) { - return std::nullopt; - } - if (!file_created.value() && !params.replace_file) { + if (!file_created && !params.replace_file) { std::string message = "File " + params.filename + " already exists on Azure Data Lake Storage"; logger_->log_error(message.c_str()); throw FileAlreadyExistsException("File " + params.filename + " already exists on Azure Data Lake Storage"); } auto upload_url = data_lake_storage_client_->uploadFile(params, buffer, buffer_size); - if (!upload_url) { - return std::nullopt; - } - UploadDataLakeStorageResult result; result.length = buffer_size; - result.primary_uri = upload_url.value(); + result.primary_uri = upload_url; return result; } diff --git a/extensions/azure/storage/AzureDataLakeStorage.h b/extensions/azure/storage/AzureDataLakeStorage.h index 5b1b901450..14e4522689 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.h +++ b/extensions/azure/storage/AzureDataLakeStorage.h @@ -48,11 +48,10 @@ class AzureDataLakeStorage { explicit FileAlreadyExistsException(const std::string& msg) : std::runtime_error(msg) {} }; - AzureDataLakeStorage() : data_lake_storage_client_(std::make_unique()) {} - explicit AzureDataLakeStorage(std::unique_ptr data_lake_storage_client) : data_lake_storage_client_(std::move(data_lake_storage_client)) { - } + AzureDataLakeStorage(); + explicit AzureDataLakeStorage(std::unique_ptr data_lake_storage_client); - std::optional uploadFile(const storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size); + azure::storage::UploadDataLakeStorageResult uploadFile(const storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size); private: std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.cpp b/extensions/azure/storage/AzureDataLakeStorageClient.cpp index 4b702f7cb2..585daf9f4e 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.cpp +++ b/extensions/azure/storage/AzureDataLakeStorageClient.cpp @@ -36,32 +36,22 @@ void AzureDataLakeStorageClient::resetClientIfNeeded(const std::string& connecti } } -std::optional AzureDataLakeStorageClient::createFile(const PutAzureDataLakeStorageParameters& params) { - try { - resetClientIfNeeded(params.connection_string, params.file_system_name); - auto directory_client = client_->GetDirectoryClient(params.directory_name); - directory_client.CreateIfNotExists(); - auto file_client = directory_client.GetFileClient(params.filename); - auto response = file_client.CreateIfNotExists(); - return response.Value.Created; - } catch (const std::runtime_error& err) { - logger_->log_error("A runtime error occurred while creating file in Data Lake storage: %s", err.what()); - return std::nullopt; - } +bool AzureDataLakeStorageClient::createFile(const PutAzureDataLakeStorageParameters& params) { + resetClientIfNeeded(params.connection_string, params.file_system_name); + auto directory_client = client_->GetDirectoryClient(params.directory_name); + directory_client.CreateIfNotExists(); + auto file_client = directory_client.GetFileClient(params.filename); + auto response = file_client.CreateIfNotExists(); + return response.Value.Created; } -std::optional AzureDataLakeStorageClient::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { - try { - resetClientIfNeeded(params.connection_string, params.file_system_name); - auto directory_client = client_->GetDirectoryClient(params.directory_name); - directory_client.CreateIfNotExists(); - auto file_client = directory_client.GetFileClient(params.filename); - file_client.UploadFrom(buffer, buffer_size); - return file_client.GetUrl(); - } catch (const std::runtime_error& err) { - logger_->log_error("A runtime error occurred while uploading blob: %s", err.what()); - return std::nullopt; - } +std::string AzureDataLakeStorageClient::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { + resetClientIfNeeded(params.connection_string, params.file_system_name); + auto directory_client = client_->GetDirectoryClient(params.directory_name); + directory_client.CreateIfNotExists(); + auto file_client = directory_client.GetFileClient(params.filename); + file_client.UploadFrom(buffer, buffer_size); + return file_client.GetUrl(); } } // namespace storage diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.h b/extensions/azure/storage/AzureDataLakeStorageClient.h index e65ca447a2..86a9335d3e 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.h +++ b/extensions/azure/storage/AzureDataLakeStorageClient.h @@ -37,8 +37,8 @@ namespace storage { class AzureDataLakeStorageClient : public DataLakeStorageClient { public: - std::optional createFile(const PutAzureDataLakeStorageParameters& params) override; - std::optional uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) override; + bool createFile(const PutAzureDataLakeStorageParameters& params) override; + std::string uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) override; private: void resetClientIfNeeded(const std::string& connection_string, const std::string& file_system_name); diff --git a/extensions/azure/storage/DataLakeStorageClient.h b/extensions/azure/storage/DataLakeStorageClient.h index 01c4b50e95..8c7919ade2 100644 --- a/extensions/azure/storage/DataLakeStorageClient.h +++ b/extensions/azure/storage/DataLakeStorageClient.h @@ -19,7 +19,6 @@ */ #pragma once -#include #include namespace org { @@ -39,8 +38,8 @@ struct PutAzureDataLakeStorageParameters { class DataLakeStorageClient { public: - virtual std::optional createFile(const PutAzureDataLakeStorageParameters& params) = 0; - virtual std::optional uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) = 0; + virtual bool createFile(const PutAzureDataLakeStorageParameters& params) = 0; + virtual std::string uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) = 0; }; } // namespace storage diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index 00cb8a5e5d..3606f4b856 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -41,15 +41,22 @@ class MockDataLakeStorageClient : public minifi::azure::storage::DataLakeStorage public: const std::string PRIMARY_URI = "test-uri"; - std::optional createFile(const minifi::azure::storage::PutAzureDataLakeStorageParameters& /*params*/) override { - return file_creation_error_ ? std::nullopt : std::make_optional(create_file_); + bool createFile(const minifi::azure::storage::PutAzureDataLakeStorageParameters& /*params*/) override { + if (file_creation_error_) { + throw std::runtime_error("error"); + } + return create_file_; } - std::optional uploadFile(const minifi::azure::storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) override { + std::string uploadFile(const minifi::azure::storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) override { input_data_ = std::string(buffer, buffer + buffer_size); params_ = params; - return upload_fails_ ? std::nullopt : std::make_optional(PRIMARY_URI); + if (upload_fails_) { + throw std::runtime_error("error"); + } + + return PRIMARY_URI; } void setFileCreation(bool create_file) { @@ -223,10 +230,39 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "File upload fails", "[azu REQUIRE(failed_flowfiles[0] == TEST_DATA); } -TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to failure in case of 'fail' resolution strategy", "[azureDataLakeStorageUpload]") { +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to failure on 'fail' resolution strategy if file exists", "[azureDataLakeStorageUpload]") { mock_data_lake_storage_client_ptr_->setFileCreation(false); test_controller_.runSession(plan_, true); auto failed_flowfiles = getFailedFlowFileContents(); REQUIRE(failed_flowfiles.size() == 1); REQUIRE(failed_flowfiles[0] == TEST_DATA); } + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to success on 'ignore' resolution strategy if file exists", "[azureDataLakeStorageUpload]") { + plan_->setProperty(put_azure_data_lake_storage_, minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), "ignore"); + mock_data_lake_storage_client_ptr_->setFileCreation(false); + test_controller_.runSession(plan_, true); + REQUIRE(getFailedFlowFileContents().size() == 0); + using org::apache::nifi::minifi::utils::verifyLogLinePresenceInPollTime; + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:filename value:" + GETFILE_FILE_NAME)); + REQUIRE(!verifyLogLinePresenceInPollTime(0s, "key:azure")); +} + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Replace old file on 'replace' resolution strategy if file exists", "[azureDataLakeStorageUpload]") { + plan_->setProperty(put_azure_data_lake_storage_, minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), "replace"); + mock_data_lake_storage_client_ptr_->setFileCreation(false); + test_controller_.runSession(plan_, true); + REQUIRE(getFailedFlowFileContents().size() == 0); + using org::apache::nifi::minifi::utils::verifyLogLinePresenceInPollTime; + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.directory value:" + DIRECTORY_NAME)); + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.filename value:" + GETFILE_FILE_NAME)); + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.filesystem value:" + FILESYSTEM_NAME)); + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.length value:" + std::to_string(TEST_DATA.size()))); + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.primaryUri value:" + mock_data_lake_storage_client_ptr_->PRIMARY_URI)); + auto passed_params = mock_data_lake_storage_client_ptr_->getPassedParams(); + REQUIRE(passed_params.connection_string == CONNECTION_STRING); + REQUIRE(passed_params.file_system_name == FILESYSTEM_NAME); + REQUIRE(passed_params.directory_name == DIRECTORY_NAME); + REQUIRE(passed_params.filename == GETFILE_FILE_NAME); + REQUIRE(passed_params.replace_file); +} From 67a01998f03f614e932043b7b066cb50298756d0 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Wed, 4 Aug 2021 17:30:44 +0200 Subject: [PATCH 06/36] Add PutAzureDataLakeStorage documentation --- PROCESSORS.md | 26 +++++++++++++++++++ .../processors/PutAzureDataLakeStorage.cpp | 3 +-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/PROCESSORS.md b/PROCESSORS.md index 4127414b4e..a7782d4105 100644 --- a/PROCESSORS.md +++ b/PROCESSORS.md @@ -42,6 +42,7 @@ - [PublishKafka](#publishkafka) - [PublishMQTT](#publishmqtt) - [PutAzureBlobStorage](#putazureblobstorage) +- [PutAzureDataLakeStorage](#putazuredatalakestorage) - [PutFile](#putfile) - [PutOPCProcessor](#putopcprocessor) - [PutS3Object](#puts3object) @@ -1238,6 +1239,31 @@ In the list below, the names of required properties appear in bold. Any other pr |success|All successfully processed FlowFiles are routed to this relationship| +## PutAzureDataLakeStorage + +### Description + +Puts content into an Azure Data Lake Storage Gen 2 +### Properties + +In the list below, the names of required properties appear in bold. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the NiFi Expression Language. + +| Name | Default Value | Allowable Values | Description | +| - | - | - | - | +|**Azure Storage Credentials Service**|||Name of the Azure Storage Credentials Service used to retrieve the connection string from.| +|**Filesystem Name**|||Name of the Azure Storage File System. It is assumed to be already existing.
**Supports Expression Language: true**| +|**Directory Name**|||Name of the Azure Storage Directory. The Directory Name cannot contain a leading '/'. The root directory can be designated by the empty string value. In case of the PutAzureDataLakeStorage processor, the directory will be created if not already existing.
**Supports Expression Language: true**| +|File Name|||The filename to be uploaded. If left empty the filename attribute will be used by default.| +|**Conflict Resolution Strategy**|fail|fail
replace
ignore|Indicates what should happen when a file with the same name already exists in the output directory.| + +### Relationships + +| Name | Description | +| - | - | +|failure|Files that could not be written to Azure storage for some reason are transferred to this relationship| +|success|Files that have been successfully written to Azure storage are transferred to this relationship| + + ## PutFile ### Description diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index a6edcc7340..4e6c66045d 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -58,8 +58,7 @@ const core::Property PutAzureDataLakeStorage::FileName( ->build()); const core::Property PutAzureDataLakeStorage::ConflictResolutionStrategy( core::PropertyBuilder::createProperty("Conflict Resolution Strategy") - ->withDescription("Indicates what should happen when a file with the same name already exists in the output directory") - ->supportsExpressionLanguage(true) + ->withDescription("Indicates what should happen when a file with the same name already exists in the output directory.") ->isRequired(true) ->withDefaultValue("fail") ->withAllowableValues(CONFLICT_RESOLUTION_STRATEGIES) From 1ee09e0ab06a4b9665debb199bfb0b031c3b1f51 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Wed, 4 Aug 2021 18:45:42 +0200 Subject: [PATCH 07/36] Use smart enum for conflict resolution strategy and fix rebase issues --- .../azure/processors/PutAzureDataLakeStorage.cpp | 15 ++++++++------- .../azure/processors/PutAzureDataLakeStorage.h | 11 +++++++++-- .../azure-tests/PutAzureDataLakeStorageTests.cpp | 10 ++++++---- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 4e6c66045d..833c504aa4 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -60,8 +60,8 @@ const core::Property PutAzureDataLakeStorage::ConflictResolutionStrategy( core::PropertyBuilder::createProperty("Conflict Resolution Strategy") ->withDescription("Indicates what should happen when a file with the same name already exists in the output directory.") ->isRequired(true) - ->withDefaultValue("fail") - ->withAllowableValues(CONFLICT_RESOLUTION_STRATEGIES) + ->withDefaultValue(toString(FileExistsResolutionStrategy::FAIL)) + ->withAllowableValues(FileExistsResolutionStrategy::values()) ->build()); const core::Relationship PutAzureDataLakeStorage::Success("success", "Files that have been successfully written to Azure storage are transferred to this relationship"); @@ -110,7 +110,8 @@ void PutAzureDataLakeStorage::onSchedule(const std::shared_ptr& context, const std::shared_ptr& session) { @@ -123,7 +124,7 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrgetProperty(FilesystemName, params.file_system_name, flow_file) || params.file_system_name.empty()) { logger_->log_error("Filesystem Name '%s' is invalid or empty!", params.file_system_name); @@ -147,11 +148,11 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrgetSize(), azure_data_lake_storage_, params, logger_); session->read(flow_file, &callback); if (callback.caughtFileAlreadyExistsError()) { - gsl_Expects(conflict_resolution_strategy_ != "replace"); - if (conflict_resolution_strategy_ == "fail") { + gsl_Expects(conflict_resolution_strategy_ != FileExistsResolutionStrategy::REPLACE); + if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::FAIL) { session->transfer(flow_file, Failure); return; - } else if (conflict_resolution_strategy_ == "ignore") { + } else if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::IGNORE) { session->transfer(flow_file, Success); return; } diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h index 80ddf0e668..68b07cc5bc 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.h +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -33,6 +33,7 @@ #include "core/logging/LoggerConfiguration.h" #include "storage/AzureDataLakeStorage.h" #include "storage/AzureDataLakeStorageClient.h" +#include "utils/Enum.h" class PutAzureDataLakeStorageTestsFixture; @@ -60,6 +61,12 @@ class PutAzureDataLakeStorage final : public core::Processor { static const core::Relationship Failure; static const core::Relationship Success; + SMART_ENUM(FileExistsResolutionStrategy, + (FAIL, "fail"), + (REPLACE, "replace"), + (IGNORE, "ignore") + ) + explicit PutAzureDataLakeStorage(const std::string& name, const minifi::utils::Identifier& uuid = minifi::utils::Identifier()) : core::Processor(name, uuid) { } @@ -101,7 +108,7 @@ class PutAzureDataLakeStorage final : public core::Processor { return read_ret; } - utils::optional getResult() const { + std::optional getResult() const { return result_; } @@ -131,7 +138,7 @@ class PutAzureDataLakeStorage final : public core::Processor { std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; std::string connection_string_; - std::string conflict_resolution_strategy_; + FileExistsResolutionStrategy conflict_resolution_strategy_; storage::AzureDataLakeStorage azure_data_lake_storage_; }; diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index 3606f4b856..de1762c06f 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -101,7 +101,7 @@ class PutAzureDataLakeStorageTestsFixture { mock_data_lake_storage_client_ptr_ = mock_data_lake_storage_client.get(); put_azure_data_lake_storage_ = std::shared_ptr( new minifi::azure::processors::PutAzureDataLakeStorage("PutAzureDataLakeStorage", utils::Identifier(), std::move(mock_data_lake_storage_client))); - auto input_dir = test_controller_.createTempDirectory("/tmp/gt.XXXXXX").str(); + auto input_dir = test_controller_.createTempDirectory(); std::ofstream input_file_stream(input_dir + utils::file::FileUtils::get_separator() + GETFILE_FILE_NAME); input_file_stream << TEST_DATA; input_file_stream.close(); @@ -119,7 +119,7 @@ class PutAzureDataLakeStorageTestsFixture { plan_->addConnection(put_azure_data_lake_storage_, {"failure", "d"}, putfile_); putfile_->setAutoTerminatedRelationships({{"success", "d"}}); putfile_->setAutoTerminatedRelationships({{"failure", "d"}}); - output_dir_ = test_controller_.createTempDirectory("/tmp/gt.XXXXXX").str(); + output_dir_ = test_controller_.createTempDirectory(); plan_->setProperty(putfile_, org::apache::nifi::minifi::processors::PutFile::Directory.getName(), output_dir_); azure_storage_cred_service_ = plan_->addController("AzureStorageCredentialsService", "AzureStorageCredentialsService"); @@ -239,7 +239,8 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to failure on 'f } TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to success on 'ignore' resolution strategy if file exists", "[azureDataLakeStorageUpload]") { - plan_->setProperty(put_azure_data_lake_storage_, minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), "ignore"); + plan_->setProperty(put_azure_data_lake_storage_, + minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), toString(minifi::azure::processors::PutAzureDataLakeStorage::FileExistsResolutionStrategy::IGNORE)); mock_data_lake_storage_client_ptr_->setFileCreation(false); test_controller_.runSession(plan_, true); REQUIRE(getFailedFlowFileContents().size() == 0); @@ -249,7 +250,8 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to success on 'i } TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Replace old file on 'replace' resolution strategy if file exists", "[azureDataLakeStorageUpload]") { - plan_->setProperty(put_azure_data_lake_storage_, minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), "replace"); + plan_->setProperty(put_azure_data_lake_storage_, + minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), toString(minifi::azure::processors::PutAzureDataLakeStorage::FileExistsResolutionStrategy::REPLACE)); mock_data_lake_storage_client_ptr_->setFileCreation(false); test_controller_.runSession(plan_, true); REQUIRE(getFailedFlowFileContents().size() == 0); From 18d4a16dd3910511e3bd167c489057fe8712f316 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Fri, 6 Aug 2021 09:51:15 +0200 Subject: [PATCH 08/36] Minor fixes and refactoring --- PROCESSORS.md | 4 +- .../processors/PutAzureDataLakeStorage.cpp | 82 +++++++++++++------ .../processors/PutAzureDataLakeStorage.h | 30 +------ .../azure/storage/AzureDataLakeStorage.cpp | 4 +- .../azure/storage/AzureDataLakeStorage.h | 3 +- .../storage/AzureDataLakeStorageClient.cpp | 17 ++-- .../storage/AzureDataLakeStorageClient.h | 1 + .../PutAzureDataLakeStorageTests.cpp | 20 ++--- 8 files changed, 86 insertions(+), 75 deletions(-) diff --git a/PROCESSORS.md b/PROCESSORS.md index a7782d4105..47436d0e0c 100644 --- a/PROCESSORS.md +++ b/PROCESSORS.md @@ -1252,8 +1252,8 @@ In the list below, the names of required properties appear in bold. Any other pr | - | - | - | - | |**Azure Storage Credentials Service**|||Name of the Azure Storage Credentials Service used to retrieve the connection string from.| |**Filesystem Name**|||Name of the Azure Storage File System. It is assumed to be already existing.
**Supports Expression Language: true**| -|**Directory Name**|||Name of the Azure Storage Directory. The Directory Name cannot contain a leading '/'. The root directory can be designated by the empty string value. In case of the PutAzureDataLakeStorage processor, the directory will be created if not already existing.
**Supports Expression Language: true**| -|File Name|||The filename to be uploaded. If left empty the filename attribute will be used by default.| +|Directory Name|||Name of the Azure Storage Directory. The Directory Name cannot contain a leading '/'. If left empty it designates the root directory. The directory will be created if not already existing.
**Supports Expression Language: true**| +|File Name|||The filename to be uploaded. If left empty the filename attribute will be used by default.
**Supports Expression Language: true**| |**Conflict Resolution Strategy**|fail|fail
replace
ignore|Indicates what should happen when a file with the same name already exists in the output directory.| ### Relationships diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 833c504aa4..429991a5b9 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -31,8 +31,6 @@ namespace minifi { namespace azure { namespace processors { -const std::set PutAzureDataLakeStorage::CONFLICT_RESOLUTION_STRATEGIES({"fail", "replace", "ignore"}); - const core::Property PutAzureDataLakeStorage::AzureStorageCredentialsService( core::PropertyBuilder::createProperty("Azure Storage Credentials Service") ->withDescription("Name of the Azure Storage Credentials Service used to retrieve the connection string from.") @@ -47,9 +45,8 @@ const core::Property PutAzureDataLakeStorage::FilesystemName( const core::Property PutAzureDataLakeStorage::DirectoryName( core::PropertyBuilder::createProperty("Directory Name") ->withDescription("Name of the Azure Storage Directory. The Directory Name cannot contain a leading '/'. " - "The root directory can be designated by the empty string value. In case of the PutAzureDataLakeStorage processor, the directory will be created if not already existing.") + "If left empty it designates the root directory. The directory will be created if not already existing.") ->supportsExpressionLanguage(true) - ->isRequired(true) ->build()); const core::Property PutAzureDataLakeStorage::FileName( core::PropertyBuilder::createProperty("File Name") @@ -114,45 +111,52 @@ void PutAzureDataLakeStorage::onSchedule(const std::shared_ptr& context, const std::shared_ptr& session) { - logger_->log_debug("PutAzureDataLakeStorage onTrigger"); - std::shared_ptr flow_file = session->get(); - if (!flow_file) { - context->yield(); - return; - } - +std::optional PutAzureDataLakeStorage::buildUploadParameters( + const std::shared_ptr& context, const std::shared_ptr& flow_file) { storage::PutAzureDataLakeStorageParameters params; params.connection_string = connection_string_; params.replace_file = conflict_resolution_strategy_ == FileExistsResolutionStrategy::REPLACE; if (!context->getProperty(FilesystemName, params.file_system_name, flow_file) || params.file_system_name.empty()) { logger_->log_error("Filesystem Name '%s' is invalid or empty!", params.file_system_name); - session->transfer(flow_file, Failure); - return; + return std::nullopt; } - if (!context->getProperty(DirectoryName, params.directory_name, flow_file) || params.directory_name.empty()) { - logger_->log_error("Directory Name '%s' is invalid or empty!", params.directory_name); - session->transfer(flow_file, Failure); - return; - } + context->getProperty(DirectoryName, params.directory_name, flow_file); context->getProperty(FileName, params.filename, flow_file); if (params.filename.empty() && (!flow_file->getAttribute("filename", params.filename) || params.filename.empty())) { logger_->log_error("No File Name is set and default object key 'filename' attribute could not be found!"); + return std::nullopt; + } + + return params; +} + +void PutAzureDataLakeStorage::onTrigger(const std::shared_ptr& context, const std::shared_ptr& session) { + logger_->log_debug("PutAzureDataLakeStorage onTrigger"); + std::shared_ptr flow_file = session->get(); + if (!flow_file) { + context->yield(); + return; + } + + const auto params = buildUploadParameters(context, flow_file); + if (!params) { session->transfer(flow_file, Failure); return; } - PutAzureDataLakeStorage::ReadCallback callback(flow_file->getSize(), azure_data_lake_storage_, params, logger_); + PutAzureDataLakeStorage::ReadCallback callback(flow_file->getSize(), azure_data_lake_storage_, *params, logger_); session->read(flow_file, &callback); if (callback.caughtFileAlreadyExistsError()) { gsl_Expects(conflict_resolution_strategy_ != FileExistsResolutionStrategy::REPLACE); if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::FAIL) { + logger_->log_error("Failed to upload file '%s/%s' to filesystem '%s' on Azure Data Lake storage because file already exists", params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Failure); return; } else if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::IGNORE) { + logger_->log_debug("Upload of file '%s/%s' was ignored because it already exits in filesystem '%s' on Azure Data Lake Storage", params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Success); return; } @@ -160,19 +164,47 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrlog_error("Failed to upload file '%s' to Azura Data Lake storage", params.filename); + logger_->log_error("Failed to upload file '%s' to Azure Data Lake storage", params->filename); session->transfer(flow_file, Failure); } else { - session->putAttribute(flow_file, "azure.filesystem", params.file_system_name); - session->putAttribute(flow_file, "azure.directory", params.directory_name); - session->putAttribute(flow_file, "azure.filename", params.filename); + session->putAttribute(flow_file, "azure.filesystem", params->file_system_name); + session->putAttribute(flow_file, "azure.directory", params->directory_name); + session->putAttribute(flow_file, "azure.filename", params->filename); session->putAttribute(flow_file, "azure.primaryUri", result->primary_uri); session->putAttribute(flow_file, "azure.length", std::to_string(result->length)); - logger_->log_debug("Successfully uploaded file '%s' to Azura Data Lake storage", params.filename); + logger_->log_debug("Successfully uploaded file '%s' to filesystem '%s' on Azure Data Lake storage", params->filename, params->file_system_name); session->transfer(flow_file, Success); } } +PutAzureDataLakeStorage::ReadCallback::ReadCallback( + uint64_t flow_size, storage::AzureDataLakeStorage& azure_data_lake_storage, const storage::PutAzureDataLakeStorageParameters& params, std::shared_ptr logger) + : flow_size_(flow_size) + , azure_data_lake_storage_(azure_data_lake_storage) + , params_(params) + , logger_(std::move(logger)) { +} + +int64_t PutAzureDataLakeStorage::ReadCallback::process(const std::shared_ptr& stream) { + std::vector buffer; + int read_ret = stream->read(buffer, flow_size_); + if (io::isError(read_ret)) { + return -1; + } + + try { + result_ = azure_data_lake_storage_.uploadFile(params_, buffer.data(), flow_size_); + } catch(const storage::AzureDataLakeStorage::FileAlreadyExistsException& ex) { + logger_->log_warn(ex.what()); + caught_file_already_exists_error_ = true; + } catch(const std::runtime_error& err) { + logger_->log_error("A runtime error occurred while uploading file to Azure Data Lake storage: %s", err.what()); + return read_ret; + } + + return read_ret; +} + } // namespace processors } // namespace azure } // namespace minifi diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h index 68b07cc5bc..e3f0ec8355 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.h +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -46,8 +46,6 @@ namespace processors { class PutAzureDataLakeStorage final : public core::Processor { public: - static const std::set CONFLICT_RESOLUTION_STRATEGIES; - static constexpr char const* ProcessorName = "PutAzureDataLakeStorage"; // Supported Properties @@ -82,31 +80,8 @@ class PutAzureDataLakeStorage final : public core::Processor { class ReadCallback : public InputStreamCallback { public: - ReadCallback(uint64_t flow_size, storage::AzureDataLakeStorage& azure_data_lake_storage, const storage::PutAzureDataLakeStorageParameters& params, std::shared_ptr logger) - : flow_size_(flow_size) - , azure_data_lake_storage_(azure_data_lake_storage) - , params_(params) - , logger_(std::move(logger)) { - } - - int64_t process(const std::shared_ptr& stream) override { - std::vector buffer; - int read_ret = stream->read(buffer, flow_size_); - if (io::isError(read_ret)) { - return -1; - } - - try { - result_ = azure_data_lake_storage_.uploadFile(params_, buffer.data(), flow_size_); - } catch(const storage::AzureDataLakeStorage::FileAlreadyExistsException&) { - caught_file_already_exists_error_ = true; - } catch(const std::runtime_error& err) { - logger_->log_error("A runtime error occurred while uploading file to Azure Data Lake storage: %s", err.what()); - return read_ret; - } - - return read_ret; - } + ReadCallback(uint64_t flow_size, storage::AzureDataLakeStorage& azure_data_lake_storage, const storage::PutAzureDataLakeStorageParameters& params, std::shared_ptr logger); + int64_t process(const std::shared_ptr& stream) override; std::optional getResult() const { return result_; @@ -135,6 +110,7 @@ class PutAzureDataLakeStorage final : public core::Processor { } std::string getConnectionStringFromControllerService(const std::shared_ptr &context) const; + std::optional buildUploadParameters(const std::shared_ptr& context, const std::shared_ptr& flow_file); std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; std::string connection_string_; diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp index acbefff0c0..5d0ed5fcf5 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.cpp +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -38,9 +38,7 @@ AzureDataLakeStorage::AzureDataLakeStorage(std::unique_ptrcreateFile(params); if (!file_created && !params.replace_file) { - std::string message = "File " + params.filename + " already exists on Azure Data Lake Storage"; - logger_->log_error(message.c_str()); - throw FileAlreadyExistsException("File " + params.filename + " already exists on Azure Data Lake Storage"); + throw FileAlreadyExistsException(params); } auto upload_url = data_lake_storage_client_->uploadFile(params, buffer, buffer_size); diff --git a/extensions/azure/storage/AzureDataLakeStorage.h b/extensions/azure/storage/AzureDataLakeStorage.h index 14e4522689..fbe1e41c03 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.h +++ b/extensions/azure/storage/AzureDataLakeStorage.h @@ -45,7 +45,8 @@ class AzureDataLakeStorage { public: class FileAlreadyExistsException : public std::runtime_error { public: - explicit FileAlreadyExistsException(const std::string& msg) : std::runtime_error(msg) {} + explicit FileAlreadyExistsException(const PutAzureDataLakeStorageParameters& params) + : std::runtime_error("File '" + params.directory_name + "/" + params.filename + "' already exists on Azure Data Lake Storage filesystem '" + params.file_system_name + "'") {} }; AzureDataLakeStorage(); diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.cpp b/extensions/azure/storage/AzureDataLakeStorageClient.cpp index 585daf9f4e..97b738a511 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.cpp +++ b/extensions/azure/storage/AzureDataLakeStorageClient.cpp @@ -36,20 +36,23 @@ void AzureDataLakeStorageClient::resetClientIfNeeded(const std::string& connecti } } -bool AzureDataLakeStorageClient::createFile(const PutAzureDataLakeStorageParameters& params) { +Azure::Storage::Files::DataLake::DataLakeFileClient AzureDataLakeStorageClient::getFileClient(const PutAzureDataLakeStorageParameters& params) { resetClientIfNeeded(params.connection_string, params.file_system_name); auto directory_client = client_->GetDirectoryClient(params.directory_name); - directory_client.CreateIfNotExists(); - auto file_client = directory_client.GetFileClient(params.filename); + if (!params.directory_name.empty()) { + directory_client.CreateIfNotExists(); + } + return directory_client.GetFileClient(params.filename); +} + +bool AzureDataLakeStorageClient::createFile(const PutAzureDataLakeStorageParameters& params) { + auto file_client = getFileClient(params); auto response = file_client.CreateIfNotExists(); return response.Value.Created; } std::string AzureDataLakeStorageClient::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { - resetClientIfNeeded(params.connection_string, params.file_system_name); - auto directory_client = client_->GetDirectoryClient(params.directory_name); - directory_client.CreateIfNotExists(); - auto file_client = directory_client.GetFileClient(params.filename); + auto file_client = getFileClient(params); file_client.UploadFrom(buffer, buffer_size); return file_client.GetUrl(); } diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.h b/extensions/azure/storage/AzureDataLakeStorageClient.h index 86a9335d3e..306e2b6881 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.h +++ b/extensions/azure/storage/AzureDataLakeStorageClient.h @@ -42,6 +42,7 @@ class AzureDataLakeStorageClient : public DataLakeStorageClient { private: void resetClientIfNeeded(const std::string& connection_string, const std::string& file_system_name); + Azure::Storage::Files::DataLake::DataLakeFileClient getFileClient(const PutAzureDataLakeStorageParameters& params); std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; std::string connection_string_; diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index de1762c06f..cb2cf3326d 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -181,16 +181,6 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Filesystem name is not se REQUIRE(failed_flowfiles[0] == TEST_DATA); } -TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Directory name is not set", "[azureDataLakeStorageParameters]") { - plan_->setProperty(update_attribute_, "test.directoryname", "", true); - test_controller_.runSession(plan_, true); - using org::apache::nifi::minifi::utils::verifyLogLinePresenceInPollTime; - REQUIRE(verifyLogLinePresenceInPollTime(1s, "Directory Name '' is invalid or empty!")); - auto failed_flowfiles = getFailedFlowFileContents(); - REQUIRE(failed_flowfiles.size() == 1); - REQUIRE(failed_flowfiles[0] == TEST_DATA); -} - TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Connection String is empty", "[azureDataLakeStorageParameters]") { plan_->setProperty(azure_storage_cred_service_, minifi::azure::controllers::AzureStorageCredentialsService::ConnectionString.getName(), ""); REQUIRE_THROWS_AS(test_controller_.runSession(plan_, true), minifi::Exception); @@ -268,3 +258,13 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Replace old file on 'repl REQUIRE(passed_params.filename == GETFILE_FILE_NAME); REQUIRE(passed_params.replace_file); } + +TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Upload to Azure Data Lake Storage with empty directory is accepted", "[azureDataLakeStorageUpload]") { + plan_->setProperty(put_azure_data_lake_storage_, minifi::azure::processors::PutAzureDataLakeStorage::DirectoryName.getName(), ""); + test_controller_.runSession(plan_, true); + REQUIRE(getFailedFlowFileContents().size() == 0); + using org::apache::nifi::minifi::utils::verifyLogLinePresenceInPollTime; + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.directory value:\n")); + auto passed_params = mock_data_lake_storage_client_ptr_->getPassedParams(); + REQUIRE(passed_params.directory_name == ""); +} From 5684a86bb42937424850859f9ea4128c88e03028 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Fri, 6 Aug 2021 12:35:24 +0200 Subject: [PATCH 09/36] Add missing virtual destructor --- extensions/azure/storage/DataLakeStorageClient.h | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions/azure/storage/DataLakeStorageClient.h b/extensions/azure/storage/DataLakeStorageClient.h index 8c7919ade2..ab3da9bd6d 100644 --- a/extensions/azure/storage/DataLakeStorageClient.h +++ b/extensions/azure/storage/DataLakeStorageClient.h @@ -40,6 +40,7 @@ class DataLakeStorageClient { public: virtual bool createFile(const PutAzureDataLakeStorageParameters& params) = 0; virtual std::string uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) = 0; + virtual ~DataLakeStorageClient() {} }; } // namespace storage From 7af73efec7d6e669dbd4544d70a9f5c0ed530147 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Fri, 6 Aug 2021 13:31:52 +0200 Subject: [PATCH 10/36] Add curl dependency to Azure core lib --- cmake/BundledAzureSdkCpp.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/BundledAzureSdkCpp.cmake b/cmake/BundledAzureSdkCpp.cmake index ec169c606b..d3a31999a6 100644 --- a/cmake/BundledAzureSdkCpp.cmake +++ b/cmake/BundledAzureSdkCpp.cmake @@ -65,7 +65,7 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) ) # Set dependencies - add_dependencies(azure-sdk-cpp-external-build LibXml2::LibXml2 OpenSSL::Crypto OpenSSL::SSL) + add_dependencies(azure-sdk-cpp-external-build CURL::libcurl LibXml2::LibXml2 OpenSSL::Crypto OpenSSL::SSL) # Set variables set(LIBAZURE_FOUND "YES" CACHE STRING "" FORCE) @@ -87,7 +87,7 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) set_target_properties(AZURE::azure-core PROPERTIES IMPORTED_LOCATION "${AZURE_CORE_LIB}") add_dependencies(AZURE::azure-core azure-sdk-cpp-external-build) target_include_directories(AZURE::azure-core INTERFACE ${LIBAZURE_INCLUDE_DIRS}) - target_link_libraries(AZURE::azure-core INTERFACE OpenSSL::Crypto OpenSSL::SSL) + target_link_libraries(AZURE::azure-core INTERFACE CURL::libcurl OpenSSL::Crypto OpenSSL::SSL) if (WIN32) target_link_libraries(AZURE::azure-core INTERFACE winhttp.lib) endif() From 2f561278f3b9c1f7d7087ed3ee5b7c7c06ba4621 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Fri, 6 Aug 2021 13:57:15 +0200 Subject: [PATCH 11/36] Fix linter issues --- extensions/azure/processors/PutAzureDataLakeStorage.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 429991a5b9..083a8ac9ce 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -152,11 +152,13 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrlog_error("Failed to upload file '%s/%s' to filesystem '%s' on Azure Data Lake storage because file already exists", params->directory_name, params->filename, params->file_system_name); + logger_->log_error("Failed to upload file '%s/%s' to filesystem '%s' on Azure Data Lake storage because file already exists", + params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Failure); return; } else if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::IGNORE) { - logger_->log_debug("Upload of file '%s/%s' was ignored because it already exits in filesystem '%s' on Azure Data Lake Storage", params->directory_name, params->filename, params->file_system_name); + logger_->log_debug("Upload of file '%s/%s' was ignored because it already exits in filesystem '%s' on Azure Data Lake Storage", + params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Success); return; } From a167c011ccd54f132b594edfeafc92d1a41fe42d Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Fri, 6 Aug 2021 15:55:46 +0200 Subject: [PATCH 12/36] Remove query string on the result Azure URL --- .../azure/storage/AzureDataLakeStorageClient.cpp | 6 +++++- .../azure/storage/AzureDataLakeStorageClient.h | 13 +++++++++++++ .../azure-tests/PutAzureDataLakeStorageTests.cpp | 5 +++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.cpp b/extensions/azure/storage/AzureDataLakeStorageClient.cpp index 97b738a511..9f5dbdd952 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.cpp +++ b/extensions/azure/storage/AzureDataLakeStorageClient.cpp @@ -54,7 +54,11 @@ bool AzureDataLakeStorageClient::createFile(const PutAzureDataLakeStorageParamet std::string AzureDataLakeStorageClient::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { auto file_client = getFileClient(params); file_client.UploadFrom(buffer, buffer_size); - return file_client.GetUrl(); + auto full_url = file_client.GetUrl(); + if (auto query_string_pos = full_url.find('?'); query_string_pos != std::string::npos) { + return full_url.substr(0, query_string_pos); + } + return full_url; } } // namespace storage diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.h b/extensions/azure/storage/AzureDataLakeStorageClient.h index 306e2b6881..49faa21741 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.h +++ b/extensions/azure/storage/AzureDataLakeStorageClient.h @@ -37,7 +37,20 @@ namespace storage { class AzureDataLakeStorageClient : public DataLakeStorageClient { public: + /** + * Creates a file on Azure Data Lake Storage + * @param params Parameters required for connecting and file creation on Azure + * @return True if a new file was created, false otherwise + */ bool createFile(const PutAzureDataLakeStorageParameters& params) override; + + /** + * Creates a file on the Azure Data Lake Storage + * @param params Parameters required for connecting and file access on Azure + * @param buffer Buffer containing the data to be uploaded + * @param buffer_size Size of the data to be uploaded + * @return URI of the file uploaded + */ std::string uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) override; private: diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index cb2cf3326d..23613f0648 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -39,7 +39,7 @@ const std::string GETFILE_FILE_NAME = "input_data.log"; class MockDataLakeStorageClient : public minifi::azure::storage::DataLakeStorageClient { public: - const std::string PRIMARY_URI = "test-uri"; + const std::string PRIMARY_URI = "http://test-uri/file"; bool createFile(const minifi::azure::storage::PutAzureDataLakeStorageParameters& /*params*/) override { if (file_creation_error_) { @@ -56,7 +56,7 @@ class MockDataLakeStorageClient : public minifi::azure::storage::DataLakeStorage throw std::runtime_error("error"); } - return PRIMARY_URI; + return RETURNED_PRIMARY_URI; } void setFileCreation(bool create_file) { @@ -76,6 +76,7 @@ class MockDataLakeStorageClient : public minifi::azure::storage::DataLakeStorage } private: + const std::string RETURNED_PRIMARY_URI = "http://test-uri/file?secret-sas"; bool create_file_ = true; bool file_creation_error_ = false; bool upload_fails_ = false; From 14dba5e894f32099121b6da9db64846d9f9957d5 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Fri, 6 Aug 2021 17:16:28 +0200 Subject: [PATCH 13/36] Fix naming conflict on Windows --- .../azure/processors/PutAzureDataLakeStorage.cpp | 10 +++++----- extensions/azure/processors/PutAzureDataLakeStorage.h | 6 +++--- .../test/azure-tests/PutAzureDataLakeStorageTests.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 083a8ac9ce..cf9ef77231 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -57,7 +57,7 @@ const core::Property PutAzureDataLakeStorage::ConflictResolutionStrategy( core::PropertyBuilder::createProperty("Conflict Resolution Strategy") ->withDescription("Indicates what should happen when a file with the same name already exists in the output directory.") ->isRequired(true) - ->withDefaultValue(toString(FileExistsResolutionStrategy::FAIL)) + ->withDefaultValue(toString(FileExistsResolutionStrategy::FAIL_FLOW)) ->withAllowableValues(FileExistsResolutionStrategy::values()) ->build()); @@ -115,7 +115,7 @@ std::optional PutAzureDataLakeStorag const std::shared_ptr& context, const std::shared_ptr& flow_file) { storage::PutAzureDataLakeStorageParameters params; params.connection_string = connection_string_; - params.replace_file = conflict_resolution_strategy_ == FileExistsResolutionStrategy::REPLACE; + params.replace_file = conflict_resolution_strategy_ == FileExistsResolutionStrategy::REPLACE_FILE; if (!context->getProperty(FilesystemName, params.file_system_name, flow_file) || params.file_system_name.empty()) { logger_->log_error("Filesystem Name '%s' is invalid or empty!", params.file_system_name); @@ -150,13 +150,13 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrgetSize(), azure_data_lake_storage_, *params, logger_); session->read(flow_file, &callback); if (callback.caughtFileAlreadyExistsError()) { - gsl_Expects(conflict_resolution_strategy_ != FileExistsResolutionStrategy::REPLACE); - if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::FAIL) { + gsl_Expects(conflict_resolution_strategy_ != FileExistsResolutionStrategy::REPLACE_FILE); + if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::FAIL_FLOW) { logger_->log_error("Failed to upload file '%s/%s' to filesystem '%s' on Azure Data Lake storage because file already exists", params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Failure); return; - } else if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::IGNORE) { + } else if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::IGNORE_REQUEST) { logger_->log_debug("Upload of file '%s/%s' was ignored because it already exits in filesystem '%s' on Azure Data Lake Storage", params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Success); diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h index e3f0ec8355..a7ee833a57 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.h +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -60,9 +60,9 @@ class PutAzureDataLakeStorage final : public core::Processor { static const core::Relationship Success; SMART_ENUM(FileExistsResolutionStrategy, - (FAIL, "fail"), - (REPLACE, "replace"), - (IGNORE, "ignore") + (FAIL_FLOW, "fail"), + (REPLACE_FILE, "replace"), + (IGNORE_REQUEST, "ignore") ) explicit PutAzureDataLakeStorage(const std::string& name, const minifi::utils::Identifier& uuid = minifi::utils::Identifier()) diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index 23613f0648..9b78b2e0a0 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -231,7 +231,7 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to failure on 'f TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to success on 'ignore' resolution strategy if file exists", "[azureDataLakeStorageUpload]") { plan_->setProperty(put_azure_data_lake_storage_, - minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), toString(minifi::azure::processors::PutAzureDataLakeStorage::FileExistsResolutionStrategy::IGNORE)); + minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), toString(minifi::azure::processors::PutAzureDataLakeStorage::FileExistsResolutionStrategy::IGNORE_REQUEST)); mock_data_lake_storage_client_ptr_->setFileCreation(false); test_controller_.runSession(plan_, true); REQUIRE(getFailedFlowFileContents().size() == 0); @@ -242,7 +242,7 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to success on 'i TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Replace old file on 'replace' resolution strategy if file exists", "[azureDataLakeStorageUpload]") { plan_->setProperty(put_azure_data_lake_storage_, - minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), toString(minifi::azure::processors::PutAzureDataLakeStorage::FileExistsResolutionStrategy::REPLACE)); + minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), toString(minifi::azure::processors::PutAzureDataLakeStorage::FileExistsResolutionStrategy::REPLACE_FILE)); mock_data_lake_storage_client_ptr_->setFileCreation(false); test_controller_.runSession(plan_, true); REQUIRE(getFailedFlowFileContents().size() == 0); From 07844f36931b32cd5c1f3f111b42b5b72e5b3b7b Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Fri, 6 Aug 2021 19:57:57 +0200 Subject: [PATCH 14/36] Fix linter issue --- libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index 9b78b2e0a0..8e9db74ccb 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -231,7 +231,8 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to failure on 'f TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to success on 'ignore' resolution strategy if file exists", "[azureDataLakeStorageUpload]") { plan_->setProperty(put_azure_data_lake_storage_, - minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), toString(minifi::azure::processors::PutAzureDataLakeStorage::FileExistsResolutionStrategy::IGNORE_REQUEST)); + minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), + toString(minifi::azure::processors::PutAzureDataLakeStorage::FileExistsResolutionStrategy::IGNORE_REQUEST)); mock_data_lake_storage_client_ptr_->setFileCreation(false); test_controller_.runSession(plan_, true); REQUIRE(getFailedFlowFileContents().size() == 0); @@ -242,7 +243,8 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to success on 'i TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Replace old file on 'replace' resolution strategy if file exists", "[azureDataLakeStorageUpload]") { plan_->setProperty(put_azure_data_lake_storage_, - minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), toString(minifi::azure::processors::PutAzureDataLakeStorage::FileExistsResolutionStrategy::REPLACE_FILE)); + minifi::azure::processors::PutAzureDataLakeStorage::ConflictResolutionStrategy.getName(), + toString(minifi::azure::processors::PutAzureDataLakeStorage::FileExistsResolutionStrategy::REPLACE_FILE)); mock_data_lake_storage_client_ptr_->setFileCreation(false); test_controller_.runSession(plan_, true); REQUIRE(getFailedFlowFileContents().size() == 0); From 5a2df2db92235b055f9dffc6922164f82600b3a9 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Tue, 17 Aug 2021 10:53:23 +0200 Subject: [PATCH 15/36] Refactoring --- extensions/azure/processors/PutAzureDataLakeStorage.cpp | 6 +++--- extensions/azure/processors/PutAzureDataLakeStorage.h | 4 ---- extensions/azure/storage/AzureDataLakeStorageClient.h | 1 - 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index cf9ef77231..55b17450eb 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -165,8 +165,8 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrlog_error("Failed to upload file '%s' to Azure Data Lake storage", params->filename); + if (!result) { + logger_->log_error("Failed to upload file '%s/%s' to filesystem '%s' on Azure Data Lake storage", params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Failure); } else { session->putAttribute(flow_file, "azure.filesystem", params->file_system_name); @@ -174,7 +174,7 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrputAttribute(flow_file, "azure.filename", params->filename); session->putAttribute(flow_file, "azure.primaryUri", result->primary_uri); session->putAttribute(flow_file, "azure.length", std::to_string(result->length)); - logger_->log_debug("Successfully uploaded file '%s' to filesystem '%s' on Azure Data Lake storage", params->filename, params->file_system_name); + logger_->log_debug("Successfully uploaded file '%s/%s' to filesystem '%s' on Azure Data Lake storage", params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Success); } } diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h index a7ee833a57..f8ffed3399 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.h +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -32,7 +31,6 @@ #include "core/logging/Logger.h" #include "core/logging/LoggerConfiguration.h" #include "storage/AzureDataLakeStorage.h" -#include "storage/AzureDataLakeStorageClient.h" #include "utils/Enum.h" class PutAzureDataLakeStorageTestsFixture; @@ -46,8 +44,6 @@ namespace processors { class PutAzureDataLakeStorage final : public core::Processor { public: - static constexpr char const* ProcessorName = "PutAzureDataLakeStorage"; - // Supported Properties static const core::Property AzureStorageCredentialsService; static const core::Property FilesystemName; diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.h b/extensions/azure/storage/AzureDataLakeStorageClient.h index 49faa21741..27840a76e6 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.h +++ b/extensions/azure/storage/AzureDataLakeStorageClient.h @@ -57,7 +57,6 @@ class AzureDataLakeStorageClient : public DataLakeStorageClient { void resetClientIfNeeded(const std::string& connection_string, const std::string& file_system_name); Azure::Storage::Files::DataLake::DataLakeFileClient getFileClient(const PutAzureDataLakeStorageParameters& params); - std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; std::string connection_string_; std::string file_system_name_; std::unique_ptr client_; From e6f7df9e2ec84bca7f51ee358700b1eedbf2ddf3 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Wed, 18 Aug 2021 17:40:52 +0200 Subject: [PATCH 16/36] Use nested namespaces --- .../azure/processors/PutAzureDataLakeStorage.cpp | 14 ++------------ .../azure/processors/PutAzureDataLakeStorage.h | 14 ++------------ extensions/azure/storage/AzureDataLakeStorage.cpp | 14 ++------------ extensions/azure/storage/AzureDataLakeStorage.h | 14 ++------------ .../azure/storage/AzureDataLakeStorageClient.cpp | 14 ++------------ .../azure/storage/AzureDataLakeStorageClient.h | 14 ++------------ extensions/azure/storage/DataLakeStorageClient.h | 14 ++------------ 7 files changed, 14 insertions(+), 84 deletions(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 55b17450eb..1794404bde 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -24,12 +24,7 @@ #include "utils/gsl.h" #include "controllerservices/AzureStorageCredentialsService.h" -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace azure { -namespace processors { +namespace org::apache::nifi::minifi::azure::processors { const core::Property PutAzureDataLakeStorage::AzureStorageCredentialsService( core::PropertyBuilder::createProperty("Azure Storage Credentials Service") @@ -207,9 +202,4 @@ int64_t PutAzureDataLakeStorage::ReadCallback::process(const std::shared_ptr()) { @@ -48,9 +43,4 @@ UploadDataLakeStorageResult AzureDataLakeStorage::uploadFile(const PutAzureDataL return result; } -} // namespace storage -} // namespace azure -} // namespace minifi -} // namespace nifi -} // namespace apache -} // namespace org +} // namespace org::apache::nifi::minifi::azure::storage diff --git a/extensions/azure/storage/AzureDataLakeStorage.h b/extensions/azure/storage/AzureDataLakeStorage.h index fbe1e41c03..d5154ee3f9 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.h +++ b/extensions/azure/storage/AzureDataLakeStorage.h @@ -29,12 +29,7 @@ #include "core/logging/LoggerConfiguration.h" #include "AzureDataLakeStorageClient.h" -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace azure { -namespace storage { +namespace org::apache::nifi::minifi::azure::storage { struct UploadDataLakeStorageResult { std::string primary_uri; @@ -59,9 +54,4 @@ class AzureDataLakeStorage { std::unique_ptr data_lake_storage_client_; }; -} // namespace storage -} // namespace azure -} // namespace minifi -} // namespace nifi -} // namespace apache -} // namespace org +} // namespace org::apache::nifi::minifi::azure::storage diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.cpp b/extensions/azure/storage/AzureDataLakeStorageClient.cpp index 9f5dbdd952..bdfbb66deb 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.cpp +++ b/extensions/azure/storage/AzureDataLakeStorageClient.cpp @@ -20,12 +20,7 @@ #include "AzureDataLakeStorageClient.h" -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace azure { -namespace storage { +namespace org::apache::nifi::minifi::azure::storage { void AzureDataLakeStorageClient::resetClientIfNeeded(const std::string& connection_string, const std::string& file_system_name) { if (client_ == nullptr || connection_string != connection_string || file_system_name_ != file_system_name) { @@ -61,9 +56,4 @@ std::string AzureDataLakeStorageClient::uploadFile(const PutAzureDataLakeStorage return full_url; } -} // namespace storage -} // namespace azure -} // namespace minifi -} // namespace nifi -} // namespace apache -} // namespace org +} // namespace org::apache::nifi::minifi::azure::storage diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.h b/extensions/azure/storage/AzureDataLakeStorageClient.h index 27840a76e6..4e6cd0d619 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.h +++ b/extensions/azure/storage/AzureDataLakeStorageClient.h @@ -28,12 +28,7 @@ #include "core/logging/Logger.h" #include "core/logging/LoggerConfiguration.h" -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace azure { -namespace storage { +namespace org::apache::nifi::minifi::azure::storage { class AzureDataLakeStorageClient : public DataLakeStorageClient { public: @@ -62,9 +57,4 @@ class AzureDataLakeStorageClient : public DataLakeStorageClient { std::unique_ptr client_; }; -} // namespace storage -} // namespace azure -} // namespace minifi -} // namespace nifi -} // namespace apache -} // namespace org +} // namespace org::apache::nifi::minifi::azure::storage diff --git a/extensions/azure/storage/DataLakeStorageClient.h b/extensions/azure/storage/DataLakeStorageClient.h index ab3da9bd6d..dc1e254abb 100644 --- a/extensions/azure/storage/DataLakeStorageClient.h +++ b/extensions/azure/storage/DataLakeStorageClient.h @@ -21,12 +21,7 @@ #include -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace azure { -namespace storage { +namespace org::apache::nifi::minifi::azure::storage { struct PutAzureDataLakeStorageParameters { std::string connection_string; @@ -43,9 +38,4 @@ class DataLakeStorageClient { virtual ~DataLakeStorageClient() {} }; -} // namespace storage -} // namespace azure -} // namespace minifi -} // namespace nifi -} // namespace apache -} // namespace org +} // namespace org::apache::nifi::minifi::azure::storage From d1b104a32533fc336c980dae3aa528326cf15e3c Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Thu, 19 Aug 2021 13:44:22 +0200 Subject: [PATCH 17/36] Fix formatting --- .../azure/processors/PutAzureDataLakeStorage.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 1794404bde..5e7fe6cf07 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -175,11 +175,11 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptr logger) - : flow_size_(flow_size) - , azure_data_lake_storage_(azure_data_lake_storage) - , params_(params) - , logger_(std::move(logger)) { + uint64_t flow_size, storage::AzureDataLakeStorage& azure_data_lake_storage, const storage::PutAzureDataLakeStorageParameters& params, std::shared_ptr logger) + : flow_size_(flow_size), + azure_data_lake_storage_(azure_data_lake_storage), + params_(params), + logger_(std::move(logger)) { } int64_t PutAzureDataLakeStorage::ReadCallback::process(const std::shared_ptr& stream) { From a587ce47e1c12fe52a47daba0cf749805e72a75c Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Thu, 19 Aug 2021 14:41:30 +0200 Subject: [PATCH 18/36] Move upload URL handling to AzureDataLakeStorage --- extensions/azure/storage/AzureDataLakeStorage.cpp | 3 +++ extensions/azure/storage/AzureDataLakeStorageClient.cpp | 6 +----- libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp index bb91f61c7a..454aafeabe 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.cpp +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -37,6 +37,9 @@ UploadDataLakeStorageResult AzureDataLakeStorage::uploadFile(const PutAzureDataL } auto upload_url = data_lake_storage_client_->uploadFile(params, buffer, buffer_size); + if (auto query_string_pos = upload_url.find('?'); query_string_pos != std::string::npos) { + upload_url = upload_url.substr(0, query_string_pos); + } UploadDataLakeStorageResult result; result.length = buffer_size; result.primary_uri = upload_url; diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.cpp b/extensions/azure/storage/AzureDataLakeStorageClient.cpp index bdfbb66deb..9c3b49d628 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.cpp +++ b/extensions/azure/storage/AzureDataLakeStorageClient.cpp @@ -49,11 +49,7 @@ bool AzureDataLakeStorageClient::createFile(const PutAzureDataLakeStorageParamet std::string AzureDataLakeStorageClient::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { auto file_client = getFileClient(params); file_client.UploadFrom(buffer, buffer_size); - auto full_url = file_client.GetUrl(); - if (auto query_string_pos = full_url.find('?'); query_string_pos != std::string::npos) { - return full_url.substr(0, query_string_pos); - } - return full_url; + return file_client.GetUrl(); } } // namespace org::apache::nifi::minifi::azure::storage diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index 8e9db74ccb..a158000f71 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -196,7 +196,7 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Upload to Azure Data Lake REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.filename value:" + GETFILE_FILE_NAME)); REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.filesystem value:" + FILESYSTEM_NAME)); REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.length value:" + std::to_string(TEST_DATA.size()))); - REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.primaryUri value:" + mock_data_lake_storage_client_ptr_->PRIMARY_URI)); + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.primaryUri value:" + mock_data_lake_storage_client_ptr_->PRIMARY_URI + "\n")); auto passed_params = mock_data_lake_storage_client_ptr_->getPassedParams(); REQUIRE(passed_params.connection_string == CONNECTION_STRING); REQUIRE(passed_params.file_system_name == FILESYSTEM_NAME); @@ -253,7 +253,7 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Replace old file on 'repl REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.filename value:" + GETFILE_FILE_NAME)); REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.filesystem value:" + FILESYSTEM_NAME)); REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.length value:" + std::to_string(TEST_DATA.size()))); - REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.primaryUri value:" + mock_data_lake_storage_client_ptr_->PRIMARY_URI)); + REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:azure.primaryUri value:" + mock_data_lake_storage_client_ptr_->PRIMARY_URI + "\n")); auto passed_params = mock_data_lake_storage_client_ptr_->getPassedParams(); REQUIRE(passed_params.connection_string == CONNECTION_STRING); REQUIRE(passed_params.file_system_name == FILESYSTEM_NAME); From 66d5c0b907ea19df80b1ac0689c05a55820a44d9 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Thu, 19 Aug 2021 15:28:43 +0200 Subject: [PATCH 19/36] Move error handling to AzureDataLakeStorage --- .../processors/PutAzureDataLakeStorage.cpp | 23 ++++---------- .../processors/PutAzureDataLakeStorage.h | 8 ++--- .../azure/storage/AzureDataLakeStorage.cpp | 31 ++++++++++++------- .../azure/storage/AzureDataLakeStorage.h | 13 ++++---- 4 files changed, 35 insertions(+), 40 deletions(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 5e7fe6cf07..24bf325397 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -144,7 +144,8 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrgetSize(), azure_data_lake_storage_, *params, logger_); session->read(flow_file, &callback); - if (callback.caughtFileAlreadyExistsError()) { + auto result = callback.getResult(); + if (result.result_code == storage::UploadResultCode::FILE_ALREADY_EXISTS) { gsl_Expects(conflict_resolution_strategy_ != FileExistsResolutionStrategy::REPLACE_FILE); if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::FAIL_FLOW) { logger_->log_error("Failed to upload file '%s/%s' to filesystem '%s' on Azure Data Lake storage because file already exists", @@ -157,18 +158,15 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrtransfer(flow_file, Success); return; } - } - - auto result = callback.getResult(); - if (!result) { + } else if (result.result_code == storage::UploadResultCode::FAILURE) { logger_->log_error("Failed to upload file '%s/%s' to filesystem '%s' on Azure Data Lake storage", params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Failure); } else { session->putAttribute(flow_file, "azure.filesystem", params->file_system_name); session->putAttribute(flow_file, "azure.directory", params->directory_name); session->putAttribute(flow_file, "azure.filename", params->filename); - session->putAttribute(flow_file, "azure.primaryUri", result->primary_uri); - session->putAttribute(flow_file, "azure.length", std::to_string(result->length)); + session->putAttribute(flow_file, "azure.primaryUri", result.primary_uri); + session->putAttribute(flow_file, "azure.length", std::to_string(result.length)); logger_->log_debug("Successfully uploaded file '%s/%s' to filesystem '%s' on Azure Data Lake storage", params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Success); } @@ -189,16 +187,7 @@ int64_t PutAzureDataLakeStorage::ReadCallback::process(const std::shared_ptrlog_warn(ex.what()); - caught_file_already_exists_error_ = true; - } catch(const std::runtime_error& err) { - logger_->log_error("A runtime error occurred while uploading file to Azure Data Lake storage: %s", err.what()); - return read_ret; - } - + result_ = azure_data_lake_storage_.uploadFile(params_, buffer.data(), flow_size_); return read_ret; } diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h index a3d78f85f4..3fe34c568f 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.h +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -74,20 +74,16 @@ class PutAzureDataLakeStorage final : public core::Processor { ReadCallback(uint64_t flow_size, storage::AzureDataLakeStorage& azure_data_lake_storage, const storage::PutAzureDataLakeStorageParameters& params, std::shared_ptr logger); int64_t process(const std::shared_ptr& stream) override; - std::optional getResult() const { + azure::storage::UploadDataLakeStorageResult getResult() const { return result_; } - bool caughtFileAlreadyExistsError() const { - return caught_file_already_exists_error_; - } - private: uint64_t flow_size_; storage::AzureDataLakeStorage& azure_data_lake_storage_; const storage::PutAzureDataLakeStorageParameters& params_; bool caught_file_already_exists_error_ = false; - std::optional result_ = std::nullopt; + azure::storage::UploadDataLakeStorageResult result_; std::shared_ptr logger_; }; diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp index 454aafeabe..36f907f649 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.cpp +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -31,19 +31,28 @@ AzureDataLakeStorage::AzureDataLakeStorage(std::unique_ptrcreateFile(params); - if (!file_created && !params.replace_file) { - throw FileAlreadyExistsException(params); - } + UploadDataLakeStorageResult result; + try { + auto file_created = data_lake_storage_client_->createFile(params); + if (!file_created && !params.replace_file) { + logger_->log_warn("File '%s/%s' already exists on Azure Data Lake Storage filesystem '%s'", params.directory_name, params.filename, params.file_system_name); + result.result_code = UploadResultCode::FILE_ALREADY_EXISTS; + return result; + } - auto upload_url = data_lake_storage_client_->uploadFile(params, buffer, buffer_size); - if (auto query_string_pos = upload_url.find('?'); query_string_pos != std::string::npos) { - upload_url = upload_url.substr(0, query_string_pos); + auto upload_url = data_lake_storage_client_->uploadFile(params, buffer, buffer_size); + if (auto query_string_pos = upload_url.find('?'); query_string_pos != std::string::npos) { + upload_url = upload_url.substr(0, query_string_pos); + } + result.length = buffer_size; + result.primary_uri = upload_url; + return result; + } catch(const std::runtime_error& err) { + logger_->log_error("A runtime error occurred while uploading file to Azure Data Lake storage: %s", err.what()); + result.result_code = UploadResultCode::FAILURE; + return result; } - UploadDataLakeStorageResult result; - result.length = buffer_size; - result.primary_uri = upload_url; - return result; + } } // namespace org::apache::nifi::minifi::azure::storage diff --git a/extensions/azure/storage/AzureDataLakeStorage.h b/extensions/azure/storage/AzureDataLakeStorage.h index d5154ee3f9..fb8da2ec62 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.h +++ b/extensions/azure/storage/AzureDataLakeStorage.h @@ -31,19 +31,20 @@ namespace org::apache::nifi::minifi::azure::storage { +enum class UploadResultCode { + SUCCESS, + FILE_ALREADY_EXISTS, + FAILURE +}; + struct UploadDataLakeStorageResult { + UploadResultCode result_code = UploadResultCode::SUCCESS; std::string primary_uri; std::size_t length; }; class AzureDataLakeStorage { public: - class FileAlreadyExistsException : public std::runtime_error { - public: - explicit FileAlreadyExistsException(const PutAzureDataLakeStorageParameters& params) - : std::runtime_error("File '" + params.directory_name + "/" + params.filename + "' already exists on Azure Data Lake Storage filesystem '" + params.file_system_name + "'") {} - }; - AzureDataLakeStorage(); explicit AzureDataLakeStorage(std::unique_ptr data_lake_storage_client); From 2ad80df355fb2f25906bc5eeae3f6e51417caaaf Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Mon, 23 Aug 2021 11:01:16 +0200 Subject: [PATCH 20/36] Fix build issues --- extensions/azure/processors/PutAzureDataLakeStorage.h | 1 - extensions/azure/storage/AzureDataLakeStorage.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h index 3fe34c568f..c2143e528d 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.h +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -82,7 +82,6 @@ class PutAzureDataLakeStorage final : public core::Processor { uint64_t flow_size_; storage::AzureDataLakeStorage& azure_data_lake_storage_; const storage::PutAzureDataLakeStorageParameters& params_; - bool caught_file_already_exists_error_ = false; azure::storage::UploadDataLakeStorageResult result_; std::shared_ptr logger_; }; diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp index 36f907f649..5c3e85ee23 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.cpp +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -52,7 +52,6 @@ UploadDataLakeStorageResult AzureDataLakeStorage::uploadFile(const PutAzureDataL result.result_code = UploadResultCode::FAILURE; return result; } - } } // namespace org::apache::nifi::minifi::azure::storage From c89db444bb71d6b85ec59f0274da0540c9963225 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Wed, 25 Aug 2021 11:25:05 +0200 Subject: [PATCH 21/36] Fix auto termination in test --- libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index a158000f71..8117fe8f6e 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -118,8 +118,7 @@ class PutAzureDataLakeStorageTestsFixture { putfile_ = plan_->addProcessor("PutFile", "PutFile", { {"success", "d"} }, false); plan_->addConnection(put_azure_data_lake_storage_, {"failure", "d"}, putfile_); - putfile_->setAutoTerminatedRelationships({{"success", "d"}}); - putfile_->setAutoTerminatedRelationships({{"failure", "d"}}); + putfile_->setAutoTerminatedRelationships({{"success", "d"}, {"failure", "d"}}); output_dir_ = test_controller_.createTempDirectory(); plan_->setProperty(putfile_, org::apache::nifi::minifi::processors::PutFile::Directory.getName(), output_dir_); From 329f7519db3fca1aa926a3a5c55f133f32986211 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Thu, 26 Aug 2021 12:18:59 +0200 Subject: [PATCH 22/36] Fix connectionstring bug --- extensions/azure/storage/AzureDataLakeStorageClient.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.cpp b/extensions/azure/storage/AzureDataLakeStorageClient.cpp index 9c3b49d628..47be50b541 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.cpp +++ b/extensions/azure/storage/AzureDataLakeStorageClient.cpp @@ -23,7 +23,7 @@ namespace org::apache::nifi::minifi::azure::storage { void AzureDataLakeStorageClient::resetClientIfNeeded(const std::string& connection_string, const std::string& file_system_name) { - if (client_ == nullptr || connection_string != connection_string || file_system_name_ != file_system_name) { + if (client_ == nullptr || connection_string_ != connection_string || file_system_name_ != file_system_name) { client_ = std::make_unique( Azure::Storage::Files::DataLake::DataLakeFileSystemClient::CreateFromConnectionString(connection_string, file_system_name)); file_system_name_ = file_system_name; From 866490742f610af5b99778015993a76bbe60960d Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Mon, 6 Sep 2021 16:05:27 +0200 Subject: [PATCH 23/36] Add new Azure processor to README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 117662c85b..de238954fc 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ Through JNI extensions you can run NiFi processors using NARs. The JNI extension | ------------- |:-------------| :-----| | Archive Extensions | [ApplyTemplate](PROCESSORS.md#applytemplate)
[CompressContent](PROCESSORS.md#compresscontent)
[ManipulateArchive](PROCESSORS.md#manipulatearchive)
[MergeContent](PROCESSORS.md#mergecontent)
[FocusArchiveEntry](PROCESSORS.md#focusarchiveentry)
[UnfocusArchiveEntry](PROCESSORS.md#unfocusarchiveentry) | -DBUILD_LIBARCHIVE=ON | | AWS | [AWSCredentialsService](CONTROLLERS.md#awscredentialsservice)
[PutS3Object](PROCESSORS.md#puts3object)
[DeleteS3Object](PROCESSORS.md#deletes3object)
[FetchS3Object](PROCESSORS.md#fetchs3object)
[ListS3](PROCESSORS.md#lists3) | -DENABLE_AWS=ON | -| Azure | [AzureStorageCredentialsService](CONTROLLERS.md#azurestoragecredentialsservice)
[PutAzureBlobStorage](PROCESSORS.md#putazureblobatorage) | -DENABLE_AZURE=ON | +| Azure | [AzureStorageCredentialsService](CONTROLLERS.md#azurestoragecredentialsservice)
[PutAzureBlobStorage](PROCESSORS.md#putazureblobatorage)
[PutAzureDataLakeStorage](#putazuredatalakestorage) | -DENABLE_AZURE=ON | | CivetWeb | [ListenHTTP](PROCESSORS.md#listenhttp) | -DDISABLE_CIVET=ON | | CURL | [InvokeHTTP](PROCESSORS.md#invokehttp) | -DDISABLE_CURL=ON | | GPS | GetGPS | -DENABLE_GPS=ON | From 215bae6b6021d295882f74dd838da6db98b22ff1 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Fri, 10 Sep 2021 17:38:16 +0200 Subject: [PATCH 24/36] Improve test runtime --- libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index 8117fe8f6e..a6d7230c46 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -237,7 +237,7 @@ TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Transfer to success on 'i REQUIRE(getFailedFlowFileContents().size() == 0); using org::apache::nifi::minifi::utils::verifyLogLinePresenceInPollTime; REQUIRE(verifyLogLinePresenceInPollTime(1s, "key:filename value:" + GETFILE_FILE_NAME)); - REQUIRE(!verifyLogLinePresenceInPollTime(0s, "key:azure")); + REQUIRE_FALSE(LogTestController::getInstance().contains("key:azure", 0s, 0ms)); } TEST_CASE_METHOD(PutAzureDataLakeStorageTestsFixture, "Replace old file on 'replace' resolution strategy if file exists", "[azureDataLakeStorageUpload]") { From c35d96f6d4e712b0d8248f6469cce42b13306bd4 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Mon, 13 Sep 2021 13:29:18 +0200 Subject: [PATCH 25/36] Update Azure SDK to version 12.2.0 --- cmake/BundledAzureSdkCpp.cmake | 4 ++-- .../azure-sdk-cpp-openssl-include-fix.patch | 14 ----------- .../azure-sdk-cpp-remove-samples.patch | 23 +++++++++++++++++++ 3 files changed, 25 insertions(+), 16 deletions(-) delete mode 100644 thirdparty/azure-sdk-cpp/azure-sdk-cpp-openssl-include-fix.patch create mode 100644 thirdparty/azure-sdk-cpp/azure-sdk-cpp-remove-samples.patch diff --git a/cmake/BundledAzureSdkCpp.cmake b/cmake/BundledAzureSdkCpp.cmake index d3a31999a6..a6b12db25e 100644 --- a/cmake/BundledAzureSdkCpp.cmake +++ b/cmake/BundledAzureSdkCpp.cmake @@ -16,7 +16,7 @@ # under the License. function(use_bundled_libazure SOURCE_DIR BINARY_DIR) - set(PATCH_FILE "${SOURCE_DIR}/thirdparty/azure-sdk-cpp/azure-sdk-cpp-openssl-include-fix.patch") + set(PATCH_FILE "${SOURCE_DIR}/thirdparty/azure-sdk-cpp/azure-sdk-cpp-remove-samples.patch") set(PC ${Bash_EXECUTABLE} -c "set -x && \ (\"${Patch_EXECUTABLE}\" -p1 -R -s -f --dry-run -i \"${PATCH_FILE}\" || \"${Patch_EXECUTABLE}\" -p1 -N -i \"${PATCH_FILE}\")") # Define byproducts @@ -53,7 +53,7 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) ExternalProject_Add( azure-sdk-cpp-external GIT_REPOSITORY "https://github.com/Azure/azure-sdk-for-cpp.git" - GIT_TAG "azure-storage-files-datalake_12.0.1" + GIT_TAG "azure-storage-files-datalake_12.2.0" BUILD_IN_SOURCE true SOURCE_DIR "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src" BUILD_BYPRODUCTS "${AZURESDK_LIBRARIES_LIST}" diff --git a/thirdparty/azure-sdk-cpp/azure-sdk-cpp-openssl-include-fix.patch b/thirdparty/azure-sdk-cpp/azure-sdk-cpp-openssl-include-fix.patch deleted file mode 100644 index 6050900028..0000000000 --- a/thirdparty/azure-sdk-cpp/azure-sdk-cpp-openssl-include-fix.patch +++ /dev/null @@ -1,14 +0,0 @@ -# Issue presented in https://github.com/Azure/azure-sdk-for-cpp/issues/2560 -diff --git a/sdk/core/azure-core/CMakeLists.txt b/sdk/core/azure-core/CMakeLists.txt -index 12f57af0..1d8f3398 100644 ---- a/sdk/core/azure-core/CMakeLists.txt -+++ b/sdk/core/azure-core/CMakeLists.txt -@@ -142,7 +142,7 @@ if(WIN32) - target_link_libraries(azure-core PRIVATE bcrypt crypt32) - else() - find_package(OpenSSL REQUIRED) -- target_link_libraries(azure-core PRIVATE OpenSSL::SSL) -+ target_link_libraries(azure-core PUBLIC OpenSSL::SSL) - endif() - - if(BUILD_TRANSPORT_CURL) diff --git a/thirdparty/azure-sdk-cpp/azure-sdk-cpp-remove-samples.patch b/thirdparty/azure-sdk-cpp/azure-sdk-cpp-remove-samples.patch new file mode 100644 index 0000000000..c9e5ea658b --- /dev/null +++ b/thirdparty/azure-sdk-cpp/azure-sdk-cpp-remove-samples.patch @@ -0,0 +1,23 @@ +# Samples require OpenSSL library on host that should not be required +diff --git a/sdk/identity/azure-identity/CMakeLists.txt b/sdk/identity/azure-identity/CMakeLists.txt +index 5a099b0e..ba8920dc 100644 +--- a/sdk/identity/azure-identity/CMakeLists.txt ++++ b/sdk/identity/azure-identity/CMakeLists.txt +@@ -92,6 +92,3 @@ if (BUILD_PERFORMANCE_TESTS) + add_subdirectory(test/perf) + endif() + +-if (AZ_ALL_LIBRARIES) +- add_subdirectory(samples) +-endif() + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 173bca57..e5e4e9a0 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -95,4 +95,3 @@ add_subdirectory(sdk/identity) + add_subdirectory(sdk/keyvault) + add_subdirectory(sdk/storage) + add_subdirectory(sdk/template) +-add_subdirectory(samples/integration/vcpkg-keyvault) + From 577e475b8b9f04072cd9ffdc50309c148b36a5ad Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Mon, 13 Sep 2021 17:33:51 +0200 Subject: [PATCH 26/36] Extract AzureStorageProcessor --- .../processors/AzureStorageProcessor.cpp | 56 +++++++++++++++++++ .../azure/processors/AzureStorageProcessor.h | 51 +++++++++++++++++ .../azure/processors/PutAzureBlobStorage.cpp | 42 +------------- .../azure/processors/PutAzureBlobStorage.h | 25 ++------- .../processors/PutAzureDataLakeStorage.cpp | 29 +--------- .../processors/PutAzureDataLakeStorage.h | 11 ++-- 6 files changed, 120 insertions(+), 94 deletions(-) create mode 100644 extensions/azure/processors/AzureStorageProcessor.cpp create mode 100644 extensions/azure/processors/AzureStorageProcessor.h diff --git a/extensions/azure/processors/AzureStorageProcessor.cpp b/extensions/azure/processors/AzureStorageProcessor.cpp new file mode 100644 index 0000000000..168e21c0cc --- /dev/null +++ b/extensions/azure/processors/AzureStorageProcessor.cpp @@ -0,0 +1,56 @@ +/** + * @file AzureStorageProcessor.cpp + * AzureStorageProcessor class implementation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AzureStorageProcessor.h" + +#include +#include + +#include "controllerservices/AzureStorageCredentialsService.h" + +namespace org::apache::nifi::minifi::azure::processors { + +const core::Property AzureStorageProcessor::AzureStorageCredentialsService( + core::PropertyBuilder::createProperty("Azure Storage Credentials Service") + ->withDescription("Name of the Azure Storage Credentials Service used to retrieve the connection string from.") + ->build()); + +std::string AzureStorageProcessor::getConnectionStringFromControllerService(const std::shared_ptr &context) const { + std::string service_name; + if (!context->getProperty(AzureStorageCredentialsService.getName(), service_name) || service_name.empty()) { + return ""; + } + + std::shared_ptr service = context->getControllerService(service_name); + if (nullptr == service) { + logger_->log_error("Azure Storage credentials service with name: '%s' could not be found", service_name.c_str()); + return ""; + } + + auto azure_credentials_service = std::dynamic_pointer_cast(service); + if (!azure_credentials_service) { + logger_->log_error("Controller service with name: '%s' is not an Azure Storage credentials service", service_name.c_str()); + return ""; + } + + return azure_credentials_service->getConnectionString(); +} + +} // namespace org::apache::nifi::minifi::azure::processors diff --git a/extensions/azure/processors/AzureStorageProcessor.h b/extensions/azure/processors/AzureStorageProcessor.h new file mode 100644 index 0000000000..ab289b4113 --- /dev/null +++ b/extensions/azure/processors/AzureStorageProcessor.h @@ -0,0 +1,51 @@ +/** + * @file AzureStorageProcessor.h + * AzureStorageProcessor class declaration + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "core/Property.h" +#include "core/Processor.h" +#include "core/logging/Logger.h" +#include "core/logging/LoggerConfiguration.h" + +namespace org::apache::nifi::minifi::azure::processors { + +class AzureStorageProcessor : public core::Processor { + public: + // Supported Properties + static const core::Property AzureStorageCredentialsService; + + explicit AzureStorageProcessor(const std::string& name, const minifi::utils::Identifier& uuid, const std::shared_ptr& logger) + : core::Processor(name, uuid), + logger_(logger) { + setSupportedProperties({AzureStorageCredentialsService}); + } + + ~AzureStorageProcessor() override = default; + + protected: + std::string getConnectionStringFromControllerService(const std::shared_ptr &context) const; + std::shared_ptr logger_; +}; + +} // namespace org::apache::nifi::minifi::azure::processors diff --git a/extensions/azure/processors/PutAzureBlobStorage.cpp b/extensions/azure/processors/PutAzureBlobStorage.cpp index 58ca4953eb..94931f25ee 100644 --- a/extensions/azure/processors/PutAzureBlobStorage.cpp +++ b/extensions/azure/processors/PutAzureBlobStorage.cpp @@ -27,12 +27,7 @@ #include "controllerservices/AzureStorageCredentialsService.h" #include "core/Resource.h" -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace azure { -namespace processors { +namespace org::apache::nifi::minifi::azure::processors { const core::Property PutAzureBlobStorage::ContainerName( core::PropertyBuilder::createProperty("Container Name") @@ -40,10 +35,6 @@ const core::Property PutAzureBlobStorage::ContainerName( ->supportsExpressionLanguage(true) ->isRequired(true) ->build()); -const core::Property PutAzureBlobStorage::AzureStorageCredentialsService( - core::PropertyBuilder::createProperty("Azure Storage Credentials Service") - ->withDescription("Name of the Azure Storage Credentials Service used to retrieve the connection string from.") - ->build()); const core::Property PutAzureBlobStorage::StorageAccountName( core::PropertyBuilder::createProperty("Storage Account Name") ->withDescription("The storage account name.") @@ -91,14 +82,13 @@ const core::Relationship PutAzureBlobStorage::Failure("failure", "Unsuccessful o void PutAzureBlobStorage::initialize() { // Set the supported properties - setSupportedProperties({ + updateSupportedProperties({ ContainerName, StorageAccountName, StorageAccountKey, SASToken, CommonStorageAccountEndpointSuffix, ConnectionString, - AzureStorageCredentialsService, Blob, CreateContainer }); @@ -147,27 +137,6 @@ void PutAzureBlobStorage::onSchedule(const std::shared_ptr logger_->log_info("Using storage account name and SAS token for authentication"); } -std::string PutAzureBlobStorage::getConnectionStringFromControllerService(const std::shared_ptr &context) const { - std::string service_name; - if (!context->getProperty(AzureStorageCredentialsService.getName(), service_name) || service_name.empty()) { - return ""; - } - - std::shared_ptr service = context->getControllerService(service_name); - if (nullptr == service) { - logger_->log_error("Azure Storage credentials service with name: '%s' could not be found", service_name.c_str()); - return ""; - } - - auto azure_credentials_service = std::dynamic_pointer_cast(service); - if (!azure_credentials_service) { - logger_->log_error("Controller service with name: '%s' is not an Azure Storage credentials service", service_name.c_str()); - return ""; - } - - return azure_credentials_service->getConnectionString(); -} - std::string PutAzureBlobStorage::getAzureConnectionStringFromProperties( const std::shared_ptr &context, const std::shared_ptr &flow_file) { @@ -260,9 +229,4 @@ void PutAzureBlobStorage::onTrigger(const std::shared_ptr REGISTER_RESOURCE(PutAzureBlobStorage, "Puts content into an Azure Storage Blob"); -} // namespace processors -} // namespace azure -} // namespace minifi -} // namespace nifi -} // namespace apache -} // namespace org +} // namespace org::apache::nifi::minifi::azure::processors diff --git a/extensions/azure/processors/PutAzureBlobStorage.h b/extensions/azure/processors/PutAzureBlobStorage.h index a712d0dcef..c07fbedaf6 100644 --- a/extensions/azure/processors/PutAzureBlobStorage.h +++ b/extensions/azure/processors/PutAzureBlobStorage.h @@ -27,27 +27,19 @@ #include #include "core/Property.h" -#include "core/Processor.h" #include "core/logging/Logger.h" #include "core/logging/LoggerConfiguration.h" #include "storage/BlobStorage.h" +#include "AzureStorageProcessor.h" class PutAzureBlobStorageTestsFixture; -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace azure { -namespace processors { +namespace org::apache::nifi::minifi::azure::processors { -class PutAzureBlobStorage : public core::Processor { +class PutAzureBlobStorage final : public AzureStorageProcessor { public: - static constexpr char const* ProcessorName = "PutAzureBlobStorage"; - // Supported Properties static const core::Property ContainerName; - static const core::Property AzureStorageCredentialsService; static const core::Property StorageAccountName; static const core::Property StorageAccountKey; static const core::Property SASToken; @@ -107,11 +99,10 @@ class PutAzureBlobStorage : public core::Processor { friend class ::PutAzureBlobStorageTestsFixture; explicit PutAzureBlobStorage(const std::string& name, const minifi::utils::Identifier& uuid, std::unique_ptr blob_storage_wrapper) - : core::Processor(name, uuid) + : AzureStorageProcessor(name, uuid, logging::LoggerFactory::getLogger()) , blob_storage_wrapper_(std::move(blob_storage_wrapper)) { } - std::string getConnectionStringFromControllerService(const std::shared_ptr &context) const; static std::string getAzureConnectionStringFromProperties( const std::shared_ptr &context, const std::shared_ptr &flow_file); @@ -123,12 +114,6 @@ class PutAzureBlobStorage : public core::Processor { std::mutex azure_storage_mutex_; std::unique_ptr blob_storage_wrapper_; bool create_container_ = false; - std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; }; -} // namespace processors -} // namespace azure -} // namespace minifi -} // namespace nifi -} // namespace apache -} // namespace org +} // namespace org::apache::nifi::minifi::azure::processors diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 24bf325397..458d1031ba 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -26,11 +26,6 @@ namespace org::apache::nifi::minifi::azure::processors { -const core::Property PutAzureDataLakeStorage::AzureStorageCredentialsService( - core::PropertyBuilder::createProperty("Azure Storage Credentials Service") - ->withDescription("Name of the Azure Storage Credentials Service used to retrieve the connection string from.") - ->isRequired(true) - ->build()); const core::Property PutAzureDataLakeStorage::FilesystemName( core::PropertyBuilder::createProperty("Filesystem Name") ->withDescription("Name of the Azure Storage File System. It is assumed to be already existing.") @@ -61,8 +56,7 @@ const core::Relationship PutAzureDataLakeStorage::Failure("failure", "Files that void PutAzureDataLakeStorage::initialize() { // Set the supported properties - setSupportedProperties({ - AzureStorageCredentialsService, + updateSupportedProperties({ FilesystemName, DirectoryName, FileName, @@ -75,27 +69,6 @@ void PutAzureDataLakeStorage::initialize() { }); } -std::string PutAzureDataLakeStorage::getConnectionStringFromControllerService(const std::shared_ptr &context) const { - std::string service_name; - if (!context->getProperty(AzureStorageCredentialsService.getName(), service_name) || service_name.empty()) { - return ""; - } - - auto service = context->getControllerService(service_name); - if (nullptr == service) { - logger_->log_error("Azure Storage credentials service with name: '%s' could not be found", service_name.c_str()); - return ""; - } - - auto azure_credentials_service = std::dynamic_pointer_cast(service); - if (!azure_credentials_service) { - logger_->log_error("Controller service with name: '%s' is not an Azure Storage credentials service", service_name.c_str()); - return ""; - } - - return azure_credentials_service->getConnectionString(); -} - void PutAzureDataLakeStorage::onSchedule(const std::shared_ptr& context, const std::shared_ptr& /*sessionFactory*/) { connection_string_ = getConnectionStringFromControllerService(context); if (connection_string_.empty()) { diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h index c2143e528d..7a672f89da 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.h +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -27,20 +27,19 @@ #include #include "core/Property.h" -#include "core/Processor.h" #include "core/logging/Logger.h" #include "core/logging/LoggerConfiguration.h" #include "storage/AzureDataLakeStorage.h" #include "utils/Enum.h" +#include "AzureStorageProcessor.h" class PutAzureDataLakeStorageTestsFixture; namespace org::apache::nifi::minifi::azure::processors { -class PutAzureDataLakeStorage final : public core::Processor { +class PutAzureDataLakeStorage final : public AzureStorageProcessor { public: // Supported Properties - static const core::Property AzureStorageCredentialsService; static const core::Property FilesystemName; static const core::Property DirectoryName; static const core::Property FileName; @@ -57,7 +56,7 @@ class PutAzureDataLakeStorage final : public core::Processor { ) explicit PutAzureDataLakeStorage(const std::string& name, const minifi::utils::Identifier& uuid = minifi::utils::Identifier()) - : core::Processor(name, uuid) { + : PutAzureDataLakeStorage(name, uuid, nullptr) { } ~PutAzureDataLakeStorage() override = default; @@ -91,14 +90,12 @@ class PutAzureDataLakeStorage final : public core::Processor { } explicit PutAzureDataLakeStorage(const std::string& name, const minifi::utils::Identifier& uuid, std::unique_ptr data_lake_storage_client) - : core::Processor(name, uuid), + : AzureStorageProcessor(name, uuid, logging::LoggerFactory::getLogger()), azure_data_lake_storage_(std::move(data_lake_storage_client)) { } - std::string getConnectionStringFromControllerService(const std::shared_ptr &context) const; std::optional buildUploadParameters(const std::shared_ptr& context, const std::shared_ptr& flow_file); - std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; std::string connection_string_; FileExistsResolutionStrategy conflict_resolution_strategy_; storage::AzureDataLakeStorage azure_data_lake_storage_; From 20d5038cbc0f2a070a7707e54bfc0e1b8d80ca1a Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Tue, 14 Sep 2021 14:40:10 +0200 Subject: [PATCH 27/36] Fix windows build --- cmake/BundledAzureSdkCpp.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/BundledAzureSdkCpp.cmake b/cmake/BundledAzureSdkCpp.cmake index a6b12db25e..edc9ae1841 100644 --- a/cmake/BundledAzureSdkCpp.cmake +++ b/cmake/BundledAzureSdkCpp.cmake @@ -89,7 +89,7 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) target_include_directories(AZURE::azure-core INTERFACE ${LIBAZURE_INCLUDE_DIRS}) target_link_libraries(AZURE::azure-core INTERFACE CURL::libcurl OpenSSL::Crypto OpenSSL::SSL) if (WIN32) - target_link_libraries(AZURE::azure-core INTERFACE winhttp.lib) + target_link_libraries(AZURE::azure-core INTERFACE winhttp.lib WebServices.lib) endif() add_library(AZURE::azure-identity STATIC IMPORTED) From d0de4e9f9e5313f5d88adc1bcd0995f8aca2c454 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Tue, 14 Sep 2021 15:36:11 +0200 Subject: [PATCH 28/36] Check for additional problems in case of upload failure --- extensions/azure/storage/AzureDataLakeStorage.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp index 5c3e85ee23..a1ae2fb8f1 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.cpp +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -47,8 +47,8 @@ UploadDataLakeStorageResult AzureDataLakeStorage::uploadFile(const PutAzureDataL result.length = buffer_size; result.primary_uri = upload_url; return result; - } catch(const std::runtime_error& err) { - logger_->log_error("A runtime error occurred while uploading file to Azure Data Lake storage: %s", err.what()); + } catch(const std::exception& ex) { + logger_->log_error("An exception occurred while uploading file to Azure Data Lake storage: %s", ex.what()); result.result_code = UploadResultCode::FAILURE; return result; } From 25a4e708245f65f3fc896098deb4cabaa0c27543 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Wed, 15 Sep 2021 12:02:19 +0200 Subject: [PATCH 29/36] Fix filename description --- extensions/azure/processors/PutAzureDataLakeStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 458d1031ba..080dceaa83 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -40,7 +40,7 @@ const core::Property PutAzureDataLakeStorage::DirectoryName( ->build()); const core::Property PutAzureDataLakeStorage::FileName( core::PropertyBuilder::createProperty("File Name") - ->withDescription("The filename") + ->withDescription("The filename to be uploaded. If left empty the filename attribute will be used by default.") ->supportsExpressionLanguage(true) ->build()); const core::Property PutAzureDataLakeStorage::ConflictResolutionStrategy( From b9d4a2f371798572bb57e56bd48d2421f770f684 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Wed, 15 Sep 2021 13:13:05 +0200 Subject: [PATCH 30/36] Incorporate review comments --- ...Processor.cpp => AzureStorageProcessorBase.cpp} | 10 +++++----- ...rageProcessor.h => AzureStorageProcessorBase.h} | 10 +++++----- .../azure/processors/PutAzureBlobStorage.cpp | 4 ++-- extensions/azure/processors/PutAzureBlobStorage.h | 14 +++++++------- .../azure/processors/PutAzureDataLakeStorage.cpp | 2 +- .../azure/processors/PutAzureDataLakeStorage.h | 12 +++++------- extensions/azure/storage/AzureDataLakeStorage.cpp | 1 - extensions/azure/storage/AzureDataLakeStorage.h | 3 +-- .../azure-tests/PutAzureDataLakeStorageTests.cpp | 5 ++--- 9 files changed, 28 insertions(+), 33 deletions(-) rename extensions/azure/processors/{AzureStorageProcessor.cpp => AzureStorageProcessorBase.cpp} (85%) rename extensions/azure/processors/{AzureStorageProcessor.h => AzureStorageProcessorBase.h} (81%) diff --git a/extensions/azure/processors/AzureStorageProcessor.cpp b/extensions/azure/processors/AzureStorageProcessorBase.cpp similarity index 85% rename from extensions/azure/processors/AzureStorageProcessor.cpp rename to extensions/azure/processors/AzureStorageProcessorBase.cpp index 168e21c0cc..42984fb695 100644 --- a/extensions/azure/processors/AzureStorageProcessor.cpp +++ b/extensions/azure/processors/AzureStorageProcessorBase.cpp @@ -1,6 +1,6 @@ /** - * @file AzureStorageProcessor.cpp - * AzureStorageProcessor class implementation + * @file AzureStorageProcessorBase.cpp + * AzureStorageProcessorBase class implementation * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -18,7 +18,7 @@ * limitations under the License. */ -#include "AzureStorageProcessor.h" +#include "AzureStorageProcessorBase.h" #include #include @@ -27,12 +27,12 @@ namespace org::apache::nifi::minifi::azure::processors { -const core::Property AzureStorageProcessor::AzureStorageCredentialsService( +const core::Property AzureStorageProcessorBase::AzureStorageCredentialsService( core::PropertyBuilder::createProperty("Azure Storage Credentials Service") ->withDescription("Name of the Azure Storage Credentials Service used to retrieve the connection string from.") ->build()); -std::string AzureStorageProcessor::getConnectionStringFromControllerService(const std::shared_ptr &context) const { +std::string AzureStorageProcessorBase::getConnectionStringFromControllerService(const std::shared_ptr &context) const { std::string service_name; if (!context->getProperty(AzureStorageCredentialsService.getName(), service_name) || service_name.empty()) { return ""; diff --git a/extensions/azure/processors/AzureStorageProcessor.h b/extensions/azure/processors/AzureStorageProcessorBase.h similarity index 81% rename from extensions/azure/processors/AzureStorageProcessor.h rename to extensions/azure/processors/AzureStorageProcessorBase.h index ab289b4113..974e61a3b7 100644 --- a/extensions/azure/processors/AzureStorageProcessor.h +++ b/extensions/azure/processors/AzureStorageProcessorBase.h @@ -1,6 +1,6 @@ /** - * @file AzureStorageProcessor.h - * AzureStorageProcessor class declaration + * @file AzureStorageProcessorBase.h + * AzureStorageProcessorBase class declaration * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -30,18 +30,18 @@ namespace org::apache::nifi::minifi::azure::processors { -class AzureStorageProcessor : public core::Processor { +class AzureStorageProcessorBase : public core::Processor { public: // Supported Properties static const core::Property AzureStorageCredentialsService; - explicit AzureStorageProcessor(const std::string& name, const minifi::utils::Identifier& uuid, const std::shared_ptr& logger) + AzureStorageProcessorBase(const std::string& name, const minifi::utils::Identifier& uuid, const std::shared_ptr& logger) : core::Processor(name, uuid), logger_(logger) { setSupportedProperties({AzureStorageCredentialsService}); } - ~AzureStorageProcessor() override = default; + ~AzureStorageProcessorBase() override = default; protected: std::string getConnectionStringFromControllerService(const std::shared_ptr &context) const; diff --git a/extensions/azure/processors/PutAzureBlobStorage.cpp b/extensions/azure/processors/PutAzureBlobStorage.cpp index 94931f25ee..31ed1aaf84 100644 --- a/extensions/azure/processors/PutAzureBlobStorage.cpp +++ b/extensions/azure/processors/PutAzureBlobStorage.cpp @@ -140,7 +140,7 @@ void PutAzureBlobStorage::onSchedule(const std::shared_ptr std::string PutAzureBlobStorage::getAzureConnectionStringFromProperties( const std::shared_ptr &context, const std::shared_ptr &flow_file) { - azure::storage::AzureStorageCredentials credentials; + storage::AzureStorageCredentials credentials; context->getProperty(StorageAccountName, credentials.storage_account_name, flow_file); context->getProperty(StorageAccountKey, credentials.storage_account_key, flow_file); context->getProperty(SASToken, credentials.sas_token, flow_file); @@ -199,7 +199,7 @@ void PutAzureBlobStorage::onTrigger(const std::shared_ptr return; } - std::optional upload_result; + std::optional upload_result; { std::lock_guard lock(azure_storage_mutex_); createAzureStorageClient(connection_string, container_name); diff --git a/extensions/azure/processors/PutAzureBlobStorage.h b/extensions/azure/processors/PutAzureBlobStorage.h index c07fbedaf6..d758626be4 100644 --- a/extensions/azure/processors/PutAzureBlobStorage.h +++ b/extensions/azure/processors/PutAzureBlobStorage.h @@ -30,13 +30,13 @@ #include "core/logging/Logger.h" #include "core/logging/LoggerConfiguration.h" #include "storage/BlobStorage.h" -#include "AzureStorageProcessor.h" +#include "AzureStorageProcessorBase.h" class PutAzureBlobStorageTestsFixture; namespace org::apache::nifi::minifi::azure::processors { -class PutAzureBlobStorage final : public AzureStorageProcessor { +class PutAzureBlobStorage final : public AzureStorageProcessorBase { public: // Supported Properties static const core::Property ContainerName; @@ -64,7 +64,7 @@ class PutAzureBlobStorage final : public AzureStorageProcessor { class ReadCallback : public InputStreamCallback { public: - ReadCallback(uint64_t flow_size, azure::storage::BlobStorage& blob_storage_wrapper, const std::string &blob_name) + ReadCallback(uint64_t flow_size, storage::BlobStorage& blob_storage_wrapper, const std::string &blob_name) : flow_size_(flow_size) , blob_storage_wrapper_(blob_storage_wrapper) , blob_name_(blob_name) { @@ -84,22 +84,22 @@ class PutAzureBlobStorage final : public AzureStorageProcessor { return result_->length; } - std::optional getResult() const { + std::optional getResult() const { return result_; } private: uint64_t flow_size_; - azure::storage::BlobStorage &blob_storage_wrapper_; + storage::BlobStorage &blob_storage_wrapper_; std::string blob_name_; - std::optional result_ = std::nullopt; + std::optional result_ = std::nullopt; }; private: friend class ::PutAzureBlobStorageTestsFixture; explicit PutAzureBlobStorage(const std::string& name, const minifi::utils::Identifier& uuid, std::unique_ptr blob_storage_wrapper) - : AzureStorageProcessor(name, uuid, logging::LoggerFactory::getLogger()) + : AzureStorageProcessorBase(name, uuid, logging::LoggerFactory::getLogger()) , blob_storage_wrapper_(std::move(blob_storage_wrapper)) { } diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 080dceaa83..4a94e46325 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -139,7 +139,7 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrputAttribute(flow_file, "azure.directory", params->directory_name); session->putAttribute(flow_file, "azure.filename", params->filename); session->putAttribute(flow_file, "azure.primaryUri", result.primary_uri); - session->putAttribute(flow_file, "azure.length", std::to_string(result.length)); + session->putAttribute(flow_file, "azure.length", std::to_string(flow_file->getSize())); logger_->log_debug("Successfully uploaded file '%s/%s' to filesystem '%s' on Azure Data Lake storage", params->directory_name, params->filename, params->file_system_name); session->transfer(flow_file, Success); } diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.h b/extensions/azure/processors/PutAzureDataLakeStorage.h index 7a672f89da..d806af387a 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.h +++ b/extensions/azure/processors/PutAzureDataLakeStorage.h @@ -31,13 +31,13 @@ #include "core/logging/LoggerConfiguration.h" #include "storage/AzureDataLakeStorage.h" #include "utils/Enum.h" -#include "AzureStorageProcessor.h" +#include "AzureStorageProcessorBase.h" class PutAzureDataLakeStorageTestsFixture; namespace org::apache::nifi::minifi::azure::processors { -class PutAzureDataLakeStorage final : public AzureStorageProcessor { +class PutAzureDataLakeStorage final : public AzureStorageProcessorBase { public: // Supported Properties static const core::Property FilesystemName; @@ -59,8 +59,6 @@ class PutAzureDataLakeStorage final : public AzureStorageProcessor { : PutAzureDataLakeStorage(name, uuid, nullptr) { } - ~PutAzureDataLakeStorage() override = default; - void initialize() override; void onSchedule(const std::shared_ptr &context, const std::shared_ptr &sessionFactory) override; void onTrigger(const std::shared_ptr &context, const std::shared_ptr &session) override; @@ -73,7 +71,7 @@ class PutAzureDataLakeStorage final : public AzureStorageProcessor { ReadCallback(uint64_t flow_size, storage::AzureDataLakeStorage& azure_data_lake_storage, const storage::PutAzureDataLakeStorageParameters& params, std::shared_ptr logger); int64_t process(const std::shared_ptr& stream) override; - azure::storage::UploadDataLakeStorageResult getResult() const { + storage::UploadDataLakeStorageResult getResult() const { return result_; } @@ -81,7 +79,7 @@ class PutAzureDataLakeStorage final : public AzureStorageProcessor { uint64_t flow_size_; storage::AzureDataLakeStorage& azure_data_lake_storage_; const storage::PutAzureDataLakeStorageParameters& params_; - azure::storage::UploadDataLakeStorageResult result_; + storage::UploadDataLakeStorageResult result_; std::shared_ptr logger_; }; @@ -90,7 +88,7 @@ class PutAzureDataLakeStorage final : public AzureStorageProcessor { } explicit PutAzureDataLakeStorage(const std::string& name, const minifi::utils::Identifier& uuid, std::unique_ptr data_lake_storage_client) - : AzureStorageProcessor(name, uuid, logging::LoggerFactory::getLogger()), + : AzureStorageProcessorBase(name, uuid, logging::LoggerFactory::getLogger()), azure_data_lake_storage_(std::move(data_lake_storage_client)) { } diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp index a1ae2fb8f1..b683c69eef 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.cpp +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -44,7 +44,6 @@ UploadDataLakeStorageResult AzureDataLakeStorage::uploadFile(const PutAzureDataL if (auto query_string_pos = upload_url.find('?'); query_string_pos != std::string::npos) { upload_url = upload_url.substr(0, query_string_pos); } - result.length = buffer_size; result.primary_uri = upload_url; return result; } catch(const std::exception& ex) { diff --git a/extensions/azure/storage/AzureDataLakeStorage.h b/extensions/azure/storage/AzureDataLakeStorage.h index fb8da2ec62..201a072778 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.h +++ b/extensions/azure/storage/AzureDataLakeStorage.h @@ -40,7 +40,6 @@ enum class UploadResultCode { struct UploadDataLakeStorageResult { UploadResultCode result_code = UploadResultCode::SUCCESS; std::string primary_uri; - std::size_t length; }; class AzureDataLakeStorage { @@ -48,7 +47,7 @@ class AzureDataLakeStorage { AzureDataLakeStorage(); explicit AzureDataLakeStorage(std::unique_ptr data_lake_storage_client); - azure::storage::UploadDataLakeStorageResult uploadFile(const storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size); + storage::UploadDataLakeStorageResult uploadFile(const storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size); private: std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index a6d7230c46..42bb604be0 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -18,6 +18,7 @@ #include "../TestBase.h" #include "utils/IntegrationTestUtils.h" +#include "utils/TestUtils.h" #include "core/Processor.h" #include "processors/PutAzureDataLakeStorage.h" #include "processors/GetFile.h" @@ -103,9 +104,7 @@ class PutAzureDataLakeStorageTestsFixture { put_azure_data_lake_storage_ = std::shared_ptr( new minifi::azure::processors::PutAzureDataLakeStorage("PutAzureDataLakeStorage", utils::Identifier(), std::move(mock_data_lake_storage_client))); auto input_dir = test_controller_.createTempDirectory(); - std::ofstream input_file_stream(input_dir + utils::file::FileUtils::get_separator() + GETFILE_FILE_NAME); - input_file_stream << TEST_DATA; - input_file_stream.close(); + utils::putFileToDir(input_dir, GETFILE_FILE_NAME, TEST_DATA); get_file_ = plan_->addProcessor("GetFile", "GetFile"); plan_->setProperty(get_file_, processors::GetFile::Directory.getName(), input_dir); From 7a11d43d683b631f508987258cb3b85cc63aa384 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Thu, 16 Sep 2021 12:58:15 +0200 Subject: [PATCH 31/36] Fix after rebase --- .../AzureStorageCredentialsService.h | 11 ++++++----- .../azure/processors/PutAzureDataLakeStorage.cpp | 3 +++ .../azure/processors/PutAzureDataLakeStorage.h | 15 +++++++-------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/extensions/azure/controllerservices/AzureStorageCredentialsService.h b/extensions/azure/controllerservices/AzureStorageCredentialsService.h index 40aa8433a7..8365c86579 100644 --- a/extensions/azure/controllerservices/AzureStorageCredentialsService.h +++ b/extensions/azure/controllerservices/AzureStorageCredentialsService.h @@ -24,6 +24,7 @@ #include "core/controller/ControllerService.h" #include "core/logging/LoggerConfiguration.h" #include "storage/AzureStorageCredentials.h" +#include "utils/Export.h" namespace org { namespace apache { @@ -34,11 +35,11 @@ namespace controllers { class AzureStorageCredentialsService : public core::controller::ControllerService { public: - static const core::Property StorageAccountName; - static const core::Property StorageAccountKey; - static const core::Property SASToken; - static const core::Property CommonStorageAccountEndpointSuffix; - static const core::Property ConnectionString; + EXTENSIONAPI static const core::Property StorageAccountName; + EXTENSIONAPI static const core::Property StorageAccountKey; + EXTENSIONAPI static const core::Property SASToken; + EXTENSIONAPI static const core::Property CommonStorageAccountEndpointSuffix; + EXTENSIONAPI static const core::Property ConnectionString; explicit AzureStorageCredentialsService(const std::string& name, const minifi::utils::Identifier& uuid = {}) : ControllerService(name, uuid), diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 4a94e46325..2b05b773fe 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -23,6 +23,7 @@ #include "utils/ProcessorConfigUtils.h" #include "utils/gsl.h" #include "controllerservices/AzureStorageCredentialsService.h" +#include "core/Resource.h" namespace org::apache::nifi::minifi::azure::processors { @@ -164,4 +165,6 @@ int64_t PutAzureDataLakeStorage::ReadCallback::process(const std::shared_ptr Date: Thu, 16 Sep 2021 13:36:59 +0200 Subject: [PATCH 32/36] Incorporate review comments --- cmake/BundledAzureSdkCpp.cmake | 4 ++-- extensions/aws/processors/DeleteS3Object.cpp | 5 +++-- extensions/aws/processors/FetchS3Object.cpp | 5 +++-- extensions/aws/processors/ListS3.cpp | 6 ++++-- extensions/aws/processors/PutS3Object.cpp | 7 ++++--- extensions/aws/processors/S3Processor.cpp | 7 +------ .../processors/AzureStorageProcessorBase.cpp | 4 ++-- .../processors/AzureStorageProcessorBase.h | 4 ++-- .../azure/processors/PutAzureBlobStorage.cpp | 5 +++-- .../azure/processors/PutAzureBlobStorage.h | 5 ++--- .../processors/PutAzureDataLakeStorage.cpp | 18 ++++++++++++------ .../azure/storage/AzureDataLakeStorage.cpp | 4 ---- .../azure/storage/AzureDataLakeStorage.h | 3 +-- 13 files changed, 39 insertions(+), 38 deletions(-) diff --git a/cmake/BundledAzureSdkCpp.cmake b/cmake/BundledAzureSdkCpp.cmake index edc9ae1841..05bea7258a 100644 --- a/cmake/BundledAzureSdkCpp.cmake +++ b/cmake/BundledAzureSdkCpp.cmake @@ -52,8 +52,8 @@ function(use_bundled_libazure SOURCE_DIR BINARY_DIR) # Build project ExternalProject_Add( azure-sdk-cpp-external - GIT_REPOSITORY "https://github.com/Azure/azure-sdk-for-cpp.git" - GIT_TAG "azure-storage-files-datalake_12.2.0" + URL https://github.com/Azure/azure-sdk-for-cpp/archive/refs/tags/azure-storage-files-datalake_12.2.0.tar.gz + URL_HASH "SHA256=d4e80ea5e786dc689ddd04825d97ab91f5e1ef2787fa88a3d5ee00f0b820433f" BUILD_IN_SOURCE true SOURCE_DIR "${BINARY_DIR}/thirdparty/azure-sdk-cpp-src" BUILD_BYPRODUCTS "${AZURESDK_LIBRARIES_LIST}" diff --git a/extensions/aws/processors/DeleteS3Object.cpp b/extensions/aws/processors/DeleteS3Object.cpp index 2159e8a408..c16a28ae38 100644 --- a/extensions/aws/processors/DeleteS3Object.cpp +++ b/extensions/aws/processors/DeleteS3Object.cpp @@ -48,7 +48,8 @@ const core::Relationship DeleteS3Object::Failure("failure", "FlowFiles are route void DeleteS3Object::initialize() { // Add new supported properties - updateSupportedProperties({ObjectKey, Version}); + setSupportedProperties({Bucket, AccessKey, SecretKey, CredentialsFile, CredentialsFile, AWSCredentialsProviderService, Region, CommunicationsTimeout, + EndpointOverrideURL, ProxyHost, ProxyPort, ProxyUsername, ProxyPassword, UseDefaultCredentials, ObjectKey, Version}); // Set the supported relationships setSupportedRelationships({Failure, Success}); } @@ -74,7 +75,7 @@ std::optional DeleteS3Object::buildDelet } void DeleteS3Object::onTrigger(const std::shared_ptr &context, const std::shared_ptr &session) { - logger_->log_debug("DeleteS3Object onTrigger"); + logger_->log_trace("DeleteS3Object onTrigger"); std::shared_ptr flow_file = session->get(); if (!flow_file) { context->yield(); diff --git a/extensions/aws/processors/FetchS3Object.cpp b/extensions/aws/processors/FetchS3Object.cpp index 8bf25aa831..c7de9058f6 100644 --- a/extensions/aws/processors/FetchS3Object.cpp +++ b/extensions/aws/processors/FetchS3Object.cpp @@ -55,7 +55,8 @@ const core::Relationship FetchS3Object::Failure("failure", "FlowFiles are routed void FetchS3Object::initialize() { // Add new supported properties - updateSupportedProperties({ObjectKey, Version, RequesterPays}); + setSupportedProperties({Bucket, AccessKey, SecretKey, CredentialsFile, CredentialsFile, AWSCredentialsProviderService, Region, CommunicationsTimeout, + EndpointOverrideURL, ProxyHost, ProxyPort, ProxyUsername, ProxyPassword, UseDefaultCredentials, ObjectKey, Version, RequesterPays}); // Set the supported relationships setSupportedRelationships({Failure, Success}); } @@ -89,7 +90,7 @@ std::optional FetchS3Object::buildFetchS3Re } void FetchS3Object::onTrigger(const std::shared_ptr &context, const std::shared_ptr &session) { - logger_->log_debug("FetchS3Object onTrigger"); + logger_->log_trace("FetchS3Object onTrigger"); std::shared_ptr flow_file = session->get(); if (!flow_file) { context->yield(); diff --git a/extensions/aws/processors/ListS3.cpp b/extensions/aws/processors/ListS3.cpp index 2ec75de0b7..b5cc1a75f1 100644 --- a/extensions/aws/processors/ListS3.cpp +++ b/extensions/aws/processors/ListS3.cpp @@ -83,7 +83,9 @@ const core::Relationship ListS3::Success("success", "FlowFiles are routed to suc void ListS3::initialize() { // Add new supported properties - updateSupportedProperties({Delimiter, Prefix, UseVersions, MinimumObjectAge, WriteObjectTags, WriteUserMetadata, RequesterPays}); + setSupportedProperties({Bucket, AccessKey, SecretKey, CredentialsFile, CredentialsFile, AWSCredentialsProviderService, Region, CommunicationsTimeout, + EndpointOverrideURL, ProxyHost, ProxyPort, ProxyUsername, ProxyPassword, UseDefaultCredentials, Delimiter, Prefix, UseVersions, + MinimumObjectAge, WriteObjectTags, WriteUserMetadata, RequesterPays}); // Set the supported relationships setSupportedRelationships({Success}); } @@ -243,7 +245,7 @@ void ListS3::createNewFlowFile( } void ListS3::onTrigger(const std::shared_ptr &context, const std::shared_ptr &session) { - logger_->log_debug("ListS3 onTrigger"); + logger_->log_trace("ListS3 onTrigger"); auto aws_results = s3_wrapper_.listBucket(*list_request_params_); if (!aws_results) { diff --git a/extensions/aws/processors/PutS3Object.cpp b/extensions/aws/processors/PutS3Object.cpp index 093850b905..f20d5b5002 100644 --- a/extensions/aws/processors/PutS3Object.cpp +++ b/extensions/aws/processors/PutS3Object.cpp @@ -107,8 +107,9 @@ const core::Relationship PutS3Object::Failure("failure", "FlowFiles are routed t void PutS3Object::initialize() { // Add new supported properties - updateSupportedProperties({ObjectKey, ContentType, StorageClass, FullControlUserList, ReadPermissionUserList, - ReadACLUserList, WriteACLUserList, CannedACL, ServerSideEncryption}); + setSupportedProperties({Bucket, AccessKey, SecretKey, CredentialsFile, CredentialsFile, AWSCredentialsProviderService, Region, CommunicationsTimeout, + EndpointOverrideURL, ProxyHost, ProxyPort, ProxyUsername, ProxyPassword, UseDefaultCredentials, ObjectKey, ContentType, StorageClass, + FullControlUserList, ReadPermissionUserList, ReadACLUserList, WriteACLUserList, CannedACL, ServerSideEncryption}); // Set the supported relationships setSupportedRelationships({Failure, Success}); } @@ -257,7 +258,7 @@ void PutS3Object::setAttributes( } void PutS3Object::onTrigger(const std::shared_ptr &context, const std::shared_ptr &session) { - logger_->log_debug("PutS3Object onTrigger"); + logger_->log_trace("PutS3Object onTrigger"); std::shared_ptr flow_file = session->get(); if (!flow_file) { context->yield(); diff --git a/extensions/aws/processors/S3Processor.cpp b/extensions/aws/processors/S3Processor.cpp index c720860087..52b47a2e81 100644 --- a/extensions/aws/processors/S3Processor.cpp +++ b/extensions/aws/processors/S3Processor.cpp @@ -115,18 +115,13 @@ const core::Property S3Processor::UseDefaultCredentials( ->build()); S3Processor::S3Processor(const std::string& name, const minifi::utils::Identifier& uuid, const std::shared_ptr &logger) - : core::Processor(name, uuid) - , logger_(logger) { - setSupportedProperties({Bucket, AccessKey, SecretKey, CredentialsFile, CredentialsFile, AWSCredentialsProviderService, Region, CommunicationsTimeout, - EndpointOverrideURL, ProxyHost, ProxyPort, ProxyUsername, ProxyPassword, UseDefaultCredentials}); + : S3Processor(name, uuid, logger, nullptr) { } S3Processor::S3Processor(const std::string& name, const minifi::utils::Identifier& uuid, const std::shared_ptr &logger, std::unique_ptr s3_request_sender) : core::Processor(name, uuid) , logger_(logger) , s3_wrapper_(std::move(s3_request_sender)) { - setSupportedProperties({Bucket, AccessKey, SecretKey, CredentialsFile, CredentialsFile, AWSCredentialsProviderService, Region, CommunicationsTimeout, - EndpointOverrideURL, ProxyHost, ProxyPort, ProxyUsername, ProxyPassword, UseDefaultCredentials}); } std::optional S3Processor::getAWSCredentialsFromControllerService(const std::shared_ptr &context) const { diff --git a/extensions/azure/processors/AzureStorageProcessorBase.cpp b/extensions/azure/processors/AzureStorageProcessorBase.cpp index 42984fb695..bbbedbae09 100644 --- a/extensions/azure/processors/AzureStorageProcessorBase.cpp +++ b/extensions/azure/processors/AzureStorageProcessorBase.cpp @@ -40,13 +40,13 @@ std::string AzureStorageProcessorBase::getConnectionStringFromControllerService( std::shared_ptr service = context->getControllerService(service_name); if (nullptr == service) { - logger_->log_error("Azure Storage credentials service with name: '%s' could not be found", service_name.c_str()); + logger_->log_error("Azure Storage credentials service with name: '%s' could not be found", service_name); return ""; } auto azure_credentials_service = std::dynamic_pointer_cast(service); if (!azure_credentials_service) { - logger_->log_error("Controller service with name: '%s' is not an Azure Storage credentials service", service_name.c_str()); + logger_->log_error("Controller service with name: '%s' is not an Azure Storage credentials service", service_name); return ""; } diff --git a/extensions/azure/processors/AzureStorageProcessorBase.h b/extensions/azure/processors/AzureStorageProcessorBase.h index 974e61a3b7..870975781b 100644 --- a/extensions/azure/processors/AzureStorageProcessorBase.h +++ b/extensions/azure/processors/AzureStorageProcessorBase.h @@ -26,7 +26,6 @@ #include "core/Property.h" #include "core/Processor.h" #include "core/logging/Logger.h" -#include "core/logging/LoggerConfiguration.h" namespace org::apache::nifi::minifi::azure::processors { @@ -38,13 +37,14 @@ class AzureStorageProcessorBase : public core::Processor { AzureStorageProcessorBase(const std::string& name, const minifi::utils::Identifier& uuid, const std::shared_ptr& logger) : core::Processor(name, uuid), logger_(logger) { - setSupportedProperties({AzureStorageCredentialsService}); } ~AzureStorageProcessorBase() override = default; protected: std::string getConnectionStringFromControllerService(const std::shared_ptr &context) const; + + std::mutex azure_storage_mutex_; std::shared_ptr logger_; }; diff --git a/extensions/azure/processors/PutAzureBlobStorage.cpp b/extensions/azure/processors/PutAzureBlobStorage.cpp index 31ed1aaf84..32634a4ec5 100644 --- a/extensions/azure/processors/PutAzureBlobStorage.cpp +++ b/extensions/azure/processors/PutAzureBlobStorage.cpp @@ -82,7 +82,8 @@ const core::Relationship PutAzureBlobStorage::Failure("failure", "Unsuccessful o void PutAzureBlobStorage::initialize() { // Set the supported properties - updateSupportedProperties({ + setSupportedProperties({ + AzureStorageCredentialsService, ContainerName, StorageAccountName, StorageAccountKey, @@ -172,7 +173,7 @@ std::string PutAzureBlobStorage::getConnectionString( } void PutAzureBlobStorage::onTrigger(const std::shared_ptr &context, const std::shared_ptr &session) { - logger_->log_debug("PutAzureBlobStorage onTrigger"); + logger_->log_trace("PutAzureBlobStorage onTrigger"); std::shared_ptr flow_file = session->get(); if (!flow_file) { return; diff --git a/extensions/azure/processors/PutAzureBlobStorage.h b/extensions/azure/processors/PutAzureBlobStorage.h index d758626be4..9e6332f9f1 100644 --- a/extensions/azure/processors/PutAzureBlobStorage.h +++ b/extensions/azure/processors/PutAzureBlobStorage.h @@ -72,8 +72,8 @@ class PutAzureBlobStorage final : public AzureStorageProcessorBase { int64_t process(const std::shared_ptr& stream) override { std::vector buffer; - int read_ret = stream->read(buffer, flow_size_); - if (read_ret < 0) { + size_t read_ret = stream->read(buffer, flow_size_); + if (io::isError(read_ret)) { return -1; } @@ -111,7 +111,6 @@ class PutAzureBlobStorage final : public AzureStorageProcessorBase { const std::shared_ptr &flow_file) const; void createAzureStorageClient(const std::string &connection_string, const std::string &container_name); - std::mutex azure_storage_mutex_; std::unique_ptr blob_storage_wrapper_; bool create_container_ = false; }; diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 2b05b773fe..fd9b8079a8 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -57,7 +57,8 @@ const core::Relationship PutAzureDataLakeStorage::Failure("failure", "Files that void PutAzureDataLakeStorage::initialize() { // Set the supported properties - updateSupportedProperties({ + setSupportedProperties({ + AzureStorageCredentialsService, FilesystemName, DirectoryName, FileName, @@ -103,7 +104,7 @@ std::optional PutAzureDataLakeStorag } void PutAzureDataLakeStorage::onTrigger(const std::shared_ptr& context, const std::shared_ptr& session) { - logger_->log_debug("PutAzureDataLakeStorage onTrigger"); + logger_->log_trace("PutAzureDataLakeStorage onTrigger"); std::shared_ptr flow_file = session->get(); if (!flow_file) { context->yield(); @@ -116,9 +117,14 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptrgetSize(), azure_data_lake_storage_, *params, logger_); - session->read(flow_file, &callback); - auto result = callback.getResult(); + storage::UploadDataLakeStorageResult result; + { + std::lock_guard lock(azure_storage_mutex_); + PutAzureDataLakeStorage::ReadCallback callback(flow_file->getSize(), azure_data_lake_storage_, *params, logger_); + session->read(flow_file, &callback); + result = callback.getResult(); + } + if (result.result_code == storage::UploadResultCode::FILE_ALREADY_EXISTS) { gsl_Expects(conflict_resolution_strategy_ != FileExistsResolutionStrategy::REPLACE_FILE); if (conflict_resolution_strategy_ == FileExistsResolutionStrategy::FAIL_FLOW) { @@ -156,7 +162,7 @@ PutAzureDataLakeStorage::ReadCallback::ReadCallback( int64_t PutAzureDataLakeStorage::ReadCallback::process(const std::shared_ptr& stream) { std::vector buffer; - int read_ret = stream->read(buffer, flow_size_); + size_t read_ret = stream->read(buffer, flow_size_); if (io::isError(read_ret)) { return -1; } diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp index b683c69eef..470e6e9ecf 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.cpp +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -22,10 +22,6 @@ namespace org::apache::nifi::minifi::azure::storage { -AzureDataLakeStorage::AzureDataLakeStorage() - : data_lake_storage_client_(std::make_unique()) { -} - AzureDataLakeStorage::AzureDataLakeStorage(std::unique_ptr data_lake_storage_client) : data_lake_storage_client_(std::move(data_lake_storage_client)) { } diff --git a/extensions/azure/storage/AzureDataLakeStorage.h b/extensions/azure/storage/AzureDataLakeStorage.h index 201a072778..813b8245a1 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.h +++ b/extensions/azure/storage/AzureDataLakeStorage.h @@ -27,7 +27,7 @@ #include "core/logging/Logger.h" #include "core/logging/LoggerConfiguration.h" -#include "AzureDataLakeStorageClient.h" +#include "DataLakeStorageClient.h" namespace org::apache::nifi::minifi::azure::storage { @@ -44,7 +44,6 @@ struct UploadDataLakeStorageResult { class AzureDataLakeStorage { public: - AzureDataLakeStorage(); explicit AzureDataLakeStorage(std::unique_ptr data_lake_storage_client); storage::UploadDataLakeStorageResult uploadFile(const storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size); From 240f0aa9d65ae84d84487e21aa67003138ef9f49 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Thu, 16 Sep 2021 17:45:44 +0200 Subject: [PATCH 33/36] Add comments --- extensions/azure/processors/PutAzureBlobStorage.cpp | 1 + extensions/azure/processors/PutAzureDataLakeStorage.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/extensions/azure/processors/PutAzureBlobStorage.cpp b/extensions/azure/processors/PutAzureBlobStorage.cpp index 32634a4ec5..243105b38d 100644 --- a/extensions/azure/processors/PutAzureBlobStorage.cpp +++ b/extensions/azure/processors/PutAzureBlobStorage.cpp @@ -202,6 +202,7 @@ void PutAzureBlobStorage::onTrigger(const std::shared_ptr std::optional upload_result; { + // TODO(lordgamez): This can be removed after maximum allowed threads are implemented. See https://issues.apache.org/jira/browse/MINIFICPP-1566 std::lock_guard lock(azure_storage_mutex_); createAzureStorageClient(connection_string, container_name); if (create_container_) { diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index fd9b8079a8..4225396e65 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -119,6 +119,7 @@ void PutAzureDataLakeStorage::onTrigger(const std::shared_ptr lock(azure_storage_mutex_); PutAzureDataLakeStorage::ReadCallback callback(flow_file->getSize(), azure_data_lake_storage_, *params, logger_); session->read(flow_file, &callback); From c569c2d49a1aacfc4bbf84f88da5a7588a203324 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Fri, 17 Sep 2021 10:24:55 +0200 Subject: [PATCH 34/36] Incorporate comment and fix build --- extensions/aws/processors/S3Processor.cpp | 9 +++++---- extensions/azure/processors/AzureStorageProcessorBase.h | 2 +- extensions/azure/processors/PutAzureBlobStorage.h | 2 +- extensions/azure/processors/PutAzureDataLakeStorage.cpp | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/extensions/aws/processors/S3Processor.cpp b/extensions/aws/processors/S3Processor.cpp index 52b47a2e81..5bc0b27d28 100644 --- a/extensions/aws/processors/S3Processor.cpp +++ b/extensions/aws/processors/S3Processor.cpp @@ -115,13 +115,14 @@ const core::Property S3Processor::UseDefaultCredentials( ->build()); S3Processor::S3Processor(const std::string& name, const minifi::utils::Identifier& uuid, const std::shared_ptr &logger) - : S3Processor(name, uuid, logger, nullptr) { + : core::Processor(name, uuid), + logger_(logger) { } S3Processor::S3Processor(const std::string& name, const minifi::utils::Identifier& uuid, const std::shared_ptr &logger, std::unique_ptr s3_request_sender) - : core::Processor(name, uuid) - , logger_(logger) - , s3_wrapper_(std::move(s3_request_sender)) { + : core::Processor(name, uuid), + logger_(logger), + s3_wrapper_(std::move(s3_request_sender)) { } std::optional S3Processor::getAWSCredentialsFromControllerService(const std::shared_ptr &context) const { diff --git a/extensions/azure/processors/AzureStorageProcessorBase.h b/extensions/azure/processors/AzureStorageProcessorBase.h index 870975781b..b2e2655238 100644 --- a/extensions/azure/processors/AzureStorageProcessorBase.h +++ b/extensions/azure/processors/AzureStorageProcessorBase.h @@ -32,7 +32,7 @@ namespace org::apache::nifi::minifi::azure::processors { class AzureStorageProcessorBase : public core::Processor { public: // Supported Properties - static const core::Property AzureStorageCredentialsService; + EXTENSIONAPI static const core::Property AzureStorageCredentialsService; AzureStorageProcessorBase(const std::string& name, const minifi::utils::Identifier& uuid, const std::shared_ptr& logger) : core::Processor(name, uuid), diff --git a/extensions/azure/processors/PutAzureBlobStorage.h b/extensions/azure/processors/PutAzureBlobStorage.h index 9e6332f9f1..f779cff66d 100644 --- a/extensions/azure/processors/PutAzureBlobStorage.h +++ b/extensions/azure/processors/PutAzureBlobStorage.h @@ -73,7 +73,7 @@ class PutAzureBlobStorage final : public AzureStorageProcessorBase { int64_t process(const std::shared_ptr& stream) override { std::vector buffer; size_t read_ret = stream->read(buffer, flow_size_); - if (io::isError(read_ret)) { + if (io::isError(read_ret) || read_ret != flow_size_) { return -1; } diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index 4225396e65..c50e217fa7 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -164,7 +164,7 @@ PutAzureDataLakeStorage::ReadCallback::ReadCallback( int64_t PutAzureDataLakeStorage::ReadCallback::process(const std::shared_ptr& stream) { std::vector buffer; size_t read_ret = stream->read(buffer, flow_size_); - if (io::isError(read_ret)) { + if (io::isError(read_ret) || read_ret != flow_size_) { return -1; } From 3f58ce363eee91a7867620943489aadb9c95f190 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Tue, 21 Sep 2021 15:54:14 +0200 Subject: [PATCH 35/36] Fix default instantiation of Azure Data Lake client --- extensions/azure/storage/AzureDataLakeStorage.cpp | 4 +++- extensions/azure/storage/AzureDataLakeStorage.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp index 470e6e9ecf..80bf2cdef0 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.cpp +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -20,10 +20,12 @@ #include "AzureDataLakeStorage.h" +#include "AzureDataLakeStorageClient.h" + namespace org::apache::nifi::minifi::azure::storage { AzureDataLakeStorage::AzureDataLakeStorage(std::unique_ptr data_lake_storage_client) - : data_lake_storage_client_(std::move(data_lake_storage_client)) { + : data_lake_storage_client_(data_lake_storage_client ? std::move(data_lake_storage_client) : std::make_unique()) { } UploadDataLakeStorageResult AzureDataLakeStorage::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { diff --git a/extensions/azure/storage/AzureDataLakeStorage.h b/extensions/azure/storage/AzureDataLakeStorage.h index 813b8245a1..3bbb4e3377 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.h +++ b/extensions/azure/storage/AzureDataLakeStorage.h @@ -44,7 +44,7 @@ struct UploadDataLakeStorageResult { class AzureDataLakeStorage { public: - explicit AzureDataLakeStorage(std::unique_ptr data_lake_storage_client); + explicit AzureDataLakeStorage(std::unique_ptr data_lake_storage_client = nullptr); storage::UploadDataLakeStorageResult uploadFile(const storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size); From faaa33597ae6e8e0649079b3b33aea5cb3ad2d53 Mon Sep 17 00:00:00 2001 From: Gabor Gyimesi Date: Tue, 21 Sep 2021 16:20:11 +0200 Subject: [PATCH 36/36] Replace buffer pointer with gsl::span --- extensions/azure/processors/PutAzureDataLakeStorage.cpp | 2 +- extensions/azure/storage/AzureDataLakeStorage.cpp | 4 ++-- extensions/azure/storage/AzureDataLakeStorage.h | 2 +- extensions/azure/storage/AzureDataLakeStorageClient.cpp | 4 ++-- extensions/azure/storage/AzureDataLakeStorageClient.h | 3 +-- extensions/azure/storage/DataLakeStorageClient.h | 4 +++- libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp | 4 ++-- 7 files changed, 12 insertions(+), 11 deletions(-) diff --git a/extensions/azure/processors/PutAzureDataLakeStorage.cpp b/extensions/azure/processors/PutAzureDataLakeStorage.cpp index c50e217fa7..caff11b358 100644 --- a/extensions/azure/processors/PutAzureDataLakeStorage.cpp +++ b/extensions/azure/processors/PutAzureDataLakeStorage.cpp @@ -168,7 +168,7 @@ int64_t PutAzureDataLakeStorage::ReadCallback::process(const std::shared_ptr{buffer.data(), flow_size_}); return read_ret; } diff --git a/extensions/azure/storage/AzureDataLakeStorage.cpp b/extensions/azure/storage/AzureDataLakeStorage.cpp index 80bf2cdef0..82e3d5a67b 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.cpp +++ b/extensions/azure/storage/AzureDataLakeStorage.cpp @@ -28,7 +28,7 @@ AzureDataLakeStorage::AzureDataLakeStorage(std::unique_ptr()) { } -UploadDataLakeStorageResult AzureDataLakeStorage::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { +UploadDataLakeStorageResult AzureDataLakeStorage::uploadFile(const PutAzureDataLakeStorageParameters& params, gsl::span buffer) { UploadDataLakeStorageResult result; try { auto file_created = data_lake_storage_client_->createFile(params); @@ -38,7 +38,7 @@ UploadDataLakeStorageResult AzureDataLakeStorage::uploadFile(const PutAzureDataL return result; } - auto upload_url = data_lake_storage_client_->uploadFile(params, buffer, buffer_size); + auto upload_url = data_lake_storage_client_->uploadFile(params, buffer); if (auto query_string_pos = upload_url.find('?'); query_string_pos != std::string::npos) { upload_url = upload_url.substr(0, query_string_pos); } diff --git a/extensions/azure/storage/AzureDataLakeStorage.h b/extensions/azure/storage/AzureDataLakeStorage.h index 3bbb4e3377..f4b67e7556 100644 --- a/extensions/azure/storage/AzureDataLakeStorage.h +++ b/extensions/azure/storage/AzureDataLakeStorage.h @@ -46,7 +46,7 @@ class AzureDataLakeStorage { public: explicit AzureDataLakeStorage(std::unique_ptr data_lake_storage_client = nullptr); - storage::UploadDataLakeStorageResult uploadFile(const storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size); + storage::UploadDataLakeStorageResult uploadFile(const storage::PutAzureDataLakeStorageParameters& params, gsl::span buffer); private: std::shared_ptr logger_{logging::LoggerFactory::getLogger()}; diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.cpp b/extensions/azure/storage/AzureDataLakeStorageClient.cpp index 47be50b541..8c9f2a2933 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.cpp +++ b/extensions/azure/storage/AzureDataLakeStorageClient.cpp @@ -46,9 +46,9 @@ bool AzureDataLakeStorageClient::createFile(const PutAzureDataLakeStorageParamet return response.Value.Created; } -std::string AzureDataLakeStorageClient::uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) { +std::string AzureDataLakeStorageClient::uploadFile(const PutAzureDataLakeStorageParameters& params, gsl::span buffer) { auto file_client = getFileClient(params); - file_client.UploadFrom(buffer, buffer_size); + file_client.UploadFrom(buffer.data(), buffer.size()); return file_client.GetUrl(); } diff --git a/extensions/azure/storage/AzureDataLakeStorageClient.h b/extensions/azure/storage/AzureDataLakeStorageClient.h index 4e6cd0d619..02b9362117 100644 --- a/extensions/azure/storage/AzureDataLakeStorageClient.h +++ b/extensions/azure/storage/AzureDataLakeStorageClient.h @@ -43,10 +43,9 @@ class AzureDataLakeStorageClient : public DataLakeStorageClient { * Creates a file on the Azure Data Lake Storage * @param params Parameters required for connecting and file access on Azure * @param buffer Buffer containing the data to be uploaded - * @param buffer_size Size of the data to be uploaded * @return URI of the file uploaded */ - std::string uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) override; + std::string uploadFile(const PutAzureDataLakeStorageParameters& params, gsl::span buffer) override; private: void resetClientIfNeeded(const std::string& connection_string, const std::string& file_system_name); diff --git a/extensions/azure/storage/DataLakeStorageClient.h b/extensions/azure/storage/DataLakeStorageClient.h index dc1e254abb..f564426695 100644 --- a/extensions/azure/storage/DataLakeStorageClient.h +++ b/extensions/azure/storage/DataLakeStorageClient.h @@ -21,6 +21,8 @@ #include +#include "gsl/gsl-lite.hpp" + namespace org::apache::nifi::minifi::azure::storage { struct PutAzureDataLakeStorageParameters { @@ -34,7 +36,7 @@ struct PutAzureDataLakeStorageParameters { class DataLakeStorageClient { public: virtual bool createFile(const PutAzureDataLakeStorageParameters& params) = 0; - virtual std::string uploadFile(const PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) = 0; + virtual std::string uploadFile(const PutAzureDataLakeStorageParameters& params, gsl::span buffer) = 0; virtual ~DataLakeStorageClient() {} }; diff --git a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp index 42bb604be0..2b5689720b 100644 --- a/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp +++ b/libminifi/test/azure-tests/PutAzureDataLakeStorageTests.cpp @@ -49,8 +49,8 @@ class MockDataLakeStorageClient : public minifi::azure::storage::DataLakeStorage return create_file_; } - std::string uploadFile(const minifi::azure::storage::PutAzureDataLakeStorageParameters& params, const uint8_t* buffer, std::size_t buffer_size) override { - input_data_ = std::string(buffer, buffer + buffer_size); + std::string uploadFile(const minifi::azure::storage::PutAzureDataLakeStorageParameters& params, gsl::span buffer) override { + input_data_ = std::string(buffer.begin(), buffer.end()); params_ = params; if (upload_fails_) {