From 898e440fef1e50a58b88751eeff4e87d75fd2a23 Mon Sep 17 00:00:00 2001 From: RandySheriffH <48490400+RandySheriffH@users.noreply.github.com> Date: Thu, 11 May 2023 22:04:02 -0700 Subject: [PATCH] Implement openAI endpoint invoker for nuget (#15797) Implement openAI audio endpoint, and enable nuget packaging. --------- Co-authored-by: Randy Shuai --- ThirdPartyNotices.txt | 29 +++ cgmanifests/generated/cgmanifest.json | 10 + cmake/deps.txt | 3 +- cmake/external/emsdk | 2 +- cmake/external/triton.cmake | 12 +- cmake/onnxruntime_framework.cmake | 6 +- .../Microsoft.ML.OnnxRuntime.csproj | 1 + .../SessionOptions.shared.cs | 4 +- .../main/java/ai/onnxruntime/OrtProvider.java | 3 +- onnxruntime/core/framework/cloud_invoker.cc | 177 +++++++++++++++++- .../c-api-noopenmp-packaging-pipelines.yml | 14 +- .../azure-pipelines/win-ci-pipeline.yml | 23 ++- 12 files changed, 259 insertions(+), 25 deletions(-) diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt index b4d981d42dfb8..e099840e64ce7 100644 --- a/ThirdPartyNotices.txt +++ b/ThirdPartyNotices.txt @@ -5993,3 +5993,32 @@ https://github.com/tensorflow/tfjs WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +—— + +curl/curl + +https://github.com/curl + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (C) Daniel Stenberg, , and many +contributors, see the THANKS file. + +All rights reserved. + +Permission to use, copy, modify, and distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright +notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN +NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall not +be used in advertising or otherwise to promote the sale, use or other dealings +in this Software without prior written authorization of the copyright holder. \ No newline at end of file diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json index bed8acc92cbd7..2054fd4b36262 100644 --- a/cgmanifests/generated/cgmanifest.json +++ b/cgmanifests/generated/cgmanifest.json @@ -448,5 +448,15 @@ "comments": "extensions" } } + { + "component": { + "type": "git", + "git": { + "commitHash": "b16d1fa8ee567b52c09a0f89940b07d8491b881d", + "repositoryUrl": "https://github.com/curl/curl.git" + }, + "comments": "curl" + } + } ] } diff --git a/cmake/deps.txt b/cmake/deps.txt index 58a7f416319c2..f940f1f65c6ce 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -49,4 +49,5 @@ pthread;https://sourceforge.net/projects/pthreads4w/files/pthreads4w-code-v3.0.0 triton;https://github.com/triton-inference-server/server/archive/refs/tags/v2.28.0.zip;4b305570aa1e889946e20e36050b6770e4108fee # above are deps introduced by triton client, might remove after 1.14 release extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c -eigen;https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip;ee201b07085203ea7bd8eb97cbcb31b07cfa3efb \ No newline at end of file +eigen;https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip;ee201b07085203ea7bd8eb97cbcb31b07cfa3efb +curl;https://github.com/curl/curl/archive/refs/tags/curl-8_0_1.zip;b16d1fa8ee567b52c09a0f89940b07d8491b881d diff --git a/cmake/external/emsdk b/cmake/external/emsdk index b113f24842c6e..0ab19024f08c6 160000 --- a/cmake/external/emsdk +++ b/cmake/external/emsdk @@ -1 +1 @@ -Subproject commit b113f24842c6e97fe3e352084db09a6e278593ae +Subproject commit 0ab19024f08c6673a713e454ef8bd95e174c807f diff --git a/cmake/external/triton.cmake b/cmake/external/triton.cmake index b24768bd89afd..9f9da0d8a6baf 100644 --- a/cmake/external/triton.cmake +++ b/cmake/external/triton.cmake @@ -44,13 +44,11 @@ if (WIN32) vcpkg_install(re2) vcpkg_install(boost-interprocess) vcpkg_install(boost-stacktrace) - vcpkg_install(zlib) vcpkg_install(pthread) vcpkg_install(b64) add_dependencies(getb64 getpthread) - add_dependencies(getpthread getzlib) - add_dependencies(getzlib getboost-stacktrace) + add_dependencies(getpthread getboost-stacktrace) add_dependencies(getboost-stacktrace getboost-interprocess) add_dependencies(getboost-interprocess getre2) add_dependencies(getre2 getrapidjson) @@ -59,11 +57,11 @@ if (WIN32) ExternalProject_Add(triton GIT_REPOSITORY https://github.com/triton-inference-server/client.git - GIT_TAG r22.12 + GIT_TAG r23.05 PREFIX triton SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build - CMAKE_ARGS -DVCPKG_TARGET_TRIPLET=${onnxruntime_target_platform}-windows -DCMAKE_TOOLCHAIN_FILE=${VCPKG_SRC}/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON + CMAKE_ARGS -DVCPKG_TARGET_TRIPLET=${onnxruntime_target_platform}-windows -DCMAKE_TOOLCHAIN_FILE=${VCPKG_SRC}/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_ZLIB=OFF INSTALL_COMMAND "" UPDATE_COMMAND "") @@ -85,11 +83,11 @@ else() ExternalProject_Add(triton GIT_REPOSITORY https://github.com/triton-inference-server/client.git - GIT_TAG r22.12 + GIT_TAG r23.05 PREFIX triton SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_ZLIB=OFF INSTALL_COMMAND "" UPDATE_COMMAND "") diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake index 5c947a52b7838..fcbae6ad32c50 100644 --- a/cmake/onnxruntime_framework.cmake +++ b/cmake/onnxruntime_framework.cmake @@ -40,19 +40,19 @@ onnxruntime_add_static_library(onnxruntime_framework ${onnxruntime_framework_src if (onnxruntime_USE_AZURE) add_dependencies(onnxruntime_framework triton) - target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include) + target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include ${TRITON_THIRD_PARTY}/curl/include) link_directories(${TRITON_BIN}/lib ${TRITON_BIN}/lib64 ${TRITON_THIRD_PARTY}/curl/lib ${TRITON_THIRD_PARTY}/curl/lib64) if (WIN32) link_directories(${VCPKG_SRC}/installed/${onnxruntime_target_platform}-windows/lib) - target_link_libraries(onnxruntime_framework PRIVATE libcurl httpclient_static ws2_32 crypt32 Wldap32 zlib) + target_link_libraries(onnxruntime_framework PRIVATE libcurl httpclient_static ws2_32 crypt32 Wldap32) else() find_package(ZLIB REQUIRED) find_package(OpenSSL REQUIRED) - target_link_libraries(onnxruntime_framework PRIVATE httpclient_static curl ZLIB::ZLIB OpenSSL::Crypto OpenSSL::SSL) + target_link_libraries(onnxruntime_framework PRIVATE httpclient_static curl OpenSSL::Crypto OpenSSL::SSL) endif() #if (WIN32) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj index 39d90c55b7ba8..9a8cc269bff2d 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj @@ -41,6 +41,7 @@ net6.0;net6.0-android;net6.0-ios;net6.0-macos diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs index 6bbb159a3de37..30951bae3f9f9 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs @@ -375,10 +375,10 @@ public IntPtr Appender(IntPtr handle, IntPtr[] optKeys, IntPtr[] optValues, UInt /// Optional key/value pairs to specify execution provider options. public void AppendExecutionProvider(string providerName, Dictionary providerOptions = null) { - if (providerName != "SNPE" && providerName != "XNNPACK" && providerName != "QNN") + if (providerName != "SNPE" && providerName != "XNNPACK" && providerName != "QNN" && providerName != "AZURE") { throw new NotSupportedException( - "Only QNN, SNPE and XNNPACK execution providers can be enabled by this method."); + "Only QNN, SNPE, XNNPACK and AZURE execution providers can be enabled by this method."); } if (providerOptions == null) diff --git a/java/src/main/java/ai/onnxruntime/OrtProvider.java b/java/src/main/java/ai/onnxruntime/OrtProvider.java index cb35bf4f507f5..0da9487c675a2 100644 --- a/java/src/main/java/ai/onnxruntime/OrtProvider.java +++ b/java/src/main/java/ai/onnxruntime/OrtProvider.java @@ -23,7 +23,8 @@ public enum OrtProvider { ARM_NN("ArmNNExecutionProvider"), ROCM("ROCMExecutionProvider"), CORE_ML("CoreMLExecutionProvider"), - XNNPACK("XnnpackExecutionProvider"); + XNNPACK("XnnpackExecutionProvider"), + AZURE("AzureExecutionProvider"); private static final Map valueMap = new HashMap<>(values().length); diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc index d6883d408f2e4..a2e9ec97cf931 100644 --- a/onnxruntime/core/framework/cloud_invoker.cc +++ b/onnxruntime/core/framework/cloud_invoker.cc @@ -2,7 +2,9 @@ // Licensed under the MIT License. #ifdef USE_AZURE +#define CURL_STATICLIB #include "http_client.h" +#include "curl/curl.h" #include "core/common/common.h" #include "core/framework/cloud_invoker.h" #include "core/framework/ort_value.h" @@ -18,13 +20,14 @@ namespace onnxruntime { namespace tc = triton::client; -const char* kAzureUri = "azure.uri"; -const char* kAzureModelName = "azure.model_name"; -const char* kAzureModelVer = "azure.model_version"; -const char* kAzureVerbose = "azure.verbose"; -const char* kAzureEndpointType = "azure.endpoint_type"; -const char* kAzureAuthKey = "azure.auth_key"; -const char* kAzureTriton = "triton"; +constexpr const char* kAzureUri = "azure.uri"; +constexpr const char* kAzureModelName = "azure.model_name"; +constexpr const char* kAzureModelVer = "azure.model_version"; +constexpr const char* kAzureVerbose = "azure.verbose"; +constexpr const char* kAzureEndpointType = "azure.endpoint_type"; +constexpr const char* kAzureAuthKey = "azure.auth_key"; +constexpr const char* kAzureTriton = "triton"; +constexpr const char* kAzureOpenAI = "openai"; CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator) : config_(config), allocator_(allocator) { @@ -33,6 +36,163 @@ CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config, } } +class CurlGlobal { + public: + static void Init() { + static CurlGlobal curl_global; + } + + private: + CurlGlobal() { + // Thread-safety is a must since curl might also be initialized in triton client. + const auto* info = curl_version_info(CURLVERSION_NOW); + ORT_ENFORCE(info->features & CURL_VERSION_THREADSAFE, "curl global init not thread-safe, need to upgrade curl version!"); + ORT_ENFORCE(curl_global_init(CURL_GLOBAL_DEFAULT) == CURLE_OK, "Failed to initialize curl global env!"); + } + ~CurlGlobal() { + curl_global_cleanup(); + } + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlGlobal); +}; + +// OpenAIInvoker +class OpenAIInvoker : public CloudEndPointInvoker { + public: + OpenAIInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator); + onnxruntime::Status Send(const CloudEndPointConfig& run_options, + const InlinedVector& input_names, + gsl::span ort_inputs, + const InlinedVector& output_names, + std::vector& ort_outputs) const override; + + private: + std::string uri_; + std::string model_name_; +}; + +OpenAIInvoker::OpenAIInvoker(const CloudEndPointConfig& config, + const AllocatorPtr& allocator) : CloudEndPointInvoker(config, allocator) { + CurlGlobal::Init(); + ReadConfig(kAzureUri, uri_); + ReadConfig(kAzureModelName, model_name_); +} + +struct StringBuffer { + StringBuffer() = default; + ~StringBuffer() = default; + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(StringBuffer); + std::stringstream ss_; +}; + +// apply the callback only when response is for sure to be a '/0' terminated string +static size_t WriteStringCallback(void* contents, size_t size, size_t nmemb, void* userp) { + try { + size_t realsize = size * nmemb; + auto buffer = reinterpret_cast(userp); + buffer->ss_.write(reinterpret_cast(contents), realsize); + return realsize; + } catch (...) { + // exception caught, abort write + return CURLcode::CURLE_WRITE_ERROR; + } +} + +using CurlWriteCallBack = size_t (*)(void*, size_t, size_t, void*); + +class CurlHandler { + public: + CurlHandler(CurlWriteCallBack call_back) : curl_(curl_easy_init(), curl_easy_cleanup), + headers_(nullptr, curl_slist_free_all), + from_holder_(from_, curl_formfree) { + curl_easy_setopt(curl_.get(), CURLOPT_BUFFERSIZE, 102400L); + curl_easy_setopt(curl_.get(), CURLOPT_NOPROGRESS, 1L); + curl_easy_setopt(curl_.get(), CURLOPT_USERAGENT, "curl/7.83.1"); + curl_easy_setopt(curl_.get(), CURLOPT_MAXREDIRS, 50L); + curl_easy_setopt(curl_.get(), CURLOPT_FTP_SKIP_PASV_IP, 1L); + curl_easy_setopt(curl_.get(), CURLOPT_TCP_KEEPALIVE, 1L); + curl_easy_setopt(curl_.get(), CURLOPT_WRITEFUNCTION, call_back); + } + ~CurlHandler() = default; + + void AddHeader(const char* data) { + headers_.reset(curl_slist_append(headers_.release(), data)); + } + template + void AddForm(Args... args) { + curl_formadd(&from_, &last_, args...); + } + template + void SetOption(CURLoption opt, T val) { + curl_easy_setopt(curl_.get(), opt, val); + } + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlHandler); + CURLcode Perform() { + SetOption(CURLOPT_HTTPHEADER, headers_.get()); + SetOption(CURLOPT_HTTPPOST, from_); + return curl_easy_perform(curl_.get()); + } + + private: + std::unique_ptr curl_; + std::unique_ptr headers_; + curl_httppost* from_{}; + curl_httppost* last_{}; + std::unique_ptr from_holder_; +}; + +onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, + const InlinedVector& /*input_names*/, + gsl::span ort_inputs, + const InlinedVector& /*output_names*/, + std::vector& ort_outputs) const { + const auto auth_key_iter = run_options.find(kAzureAuthKey); + if (run_options.end() == auth_key_iter || auth_key_iter->second.empty()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "auth key must be specified for openai client"); + } + long verbose = 0; + const auto verbose_iter = run_options.find(kAzureVerbose); + if (run_options.end() != verbose_iter) { + verbose = verbose_iter->second != "0" ? 1L : 0L; + } + + CurlHandler curl_handler(WriteStringCallback); + StringBuffer string_buffer; + + std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second; + curl_handler.AddHeader(full_auth.c_str()); + curl_handler.AddHeader("Content-Type: multipart/form-data"); + + const auto& tensor = ort_inputs[0].Get(); + auto data_size = tensor.SizeInBytes(); + curl_handler.AddForm(CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END); + curl_handler.AddForm(CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END); + curl_handler.AddForm(CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(), + CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END); + + curl_handler.SetOption(CURLOPT_URL, uri_.c_str()); + curl_handler.SetOption(CURLOPT_VERBOSE, verbose); + curl_handler.SetOption(CURLOPT_WRITEDATA, (void*)&string_buffer); + + auto curl_ret = curl_handler.Perform(); + if (CURLE_OK != curl_ret) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(curl_ret)); + } + + auto output_tensor = std::make_unique(onnxruntime::DataTypeImpl::GetType(), TensorShape{1}, allocator_); + if (!output_tensor) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor"); + } + + auto* output_string = output_tensor->MutableData(); + *output_string = string_buffer.ss_.str(); + auto tensor_type = DataTypeImpl::GetType(); + ort_outputs.clear(); + ort_outputs.emplace_back(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc()); + return Status::OK(); +} + +// AzureTritonInvoker class AzureTritonInvoker : public CloudEndPointInvoker { public: AzureTritonInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator); @@ -287,6 +447,9 @@ Status CloudEndPointInvoker::CreateInvoker(const CloudEndPointConfig& config, if (iter->second == kAzureTriton) { invoker = std::make_unique(config, allocator); return status; + } else if (iter->second == kAzureOpenAI) { + invoker = std::make_unique(config, allocator); + return status; } // else other endpoint types ... } status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index 3d64c2c67cc8e..b4516b4359821 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -50,6 +50,16 @@ parameters: type: string default: '0' +- name: NugetPackageSuffix + displayName: Suffix to append to nuget package + type: string + default: '' + +- name: AdditionalBuildFlag + displayName: Build flags to append to build command + type: string + default: '' + resources: repositories: - repository: onnxruntime-inference-examples # The name used to reference this repository in the checkout step @@ -100,9 +110,9 @@ stages: DoCompliance: ${{ parameters.DoCompliance }} DoEsrp: ${{ parameters.DoEsrp }} IsReleaseBuild: ${{ parameters.IsReleaseBuild }} - OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime' + OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime${{ parameters.NugetPackageSuffix }}' AdditionalBuildFlags: '' - AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos' + AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos ${{parameters.AdditionalBuildFlag}}' BuildVariant: 'default' SpecificArtifact: ${{ parameters.SpecificArtifact }} BuildId: ${{ parameters.BuildId }} diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml index 2c42ad4693ef4..708ea540d445b 100644 --- a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml @@ -190,4 +190,25 @@ stages: ORT_EP_NAME: CPU GenerateDocumentation: false WITH_CACHE: true - MachinePool: 'onnxruntime-Win2019-CPU-training-AMD' \ No newline at end of file + MachinePool: 'onnxruntime-Win2019-CPU-training-AMD' + +- stage: x64_release_azure + dependsOn: [] + jobs: + - template: templates/win-ci-vs-2019.yml + parameters: + BuildConfig: 'RelWithDebInfo' + EnvSetupScript: setup_env_azure.bat + buildArch: x64 + additionalBuildFlags: --use_azure + msbuildPlatform: x64 + isX86: false + job_name_suffix: x64_release_azure + RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} + RunStaticCodeAnalysis: false + EnablePython: false + isTraining: false + ORT_EP_NAME: CPU + GenerateDocumentation: false + WITH_CACHE: true + MachinePool: 'onnxruntime-Win-CPU-2019' \ No newline at end of file