From a1feb541099e0957a6f303945c5df5e92cd765c7 Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Wed, 3 May 2023 14:12:52 -0700 Subject: [PATCH 01/16] implement openAi endpoint in nuget --- cmake/onnxruntime_framework.cmake | 2 +- .../Microsoft.ML.OnnxRuntime.csproj | 1 + .../SessionOptions.shared.cs | 4 +- onnxruntime/core/framework/cloud_invoker.cc | 122 ++++++++++++++++++ .../c-api-noopenmp-packaging-pipelines.yml | 14 +- .../azure-pipelines/win-ci-pipeline.yml | 23 +++- 6 files changed, 160 insertions(+), 6 deletions(-) diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake index 5c947a52b7838..b5cd2c3d1b201 100644 --- a/cmake/onnxruntime_framework.cmake +++ b/cmake/onnxruntime_framework.cmake @@ -40,7 +40,7 @@ onnxruntime_add_static_library(onnxruntime_framework ${onnxruntime_framework_src if (onnxruntime_USE_AZURE) add_dependencies(onnxruntime_framework triton) - target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include) + target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include ${TRITON_THIRD_PARTY}/curl/include) link_directories(${TRITON_BIN}/lib ${TRITON_BIN}/lib64 ${TRITON_THIRD_PARTY}/curl/lib ${TRITON_THIRD_PARTY}/curl/lib64) if (WIN32) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj index 78083a8cc1358..ad468b0c6d507 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj @@ -41,6 +41,7 @@ net6.0;net6.0-android;net6.0-ios;net6.0-macos diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs index 6bbb159a3de37..30951bae3f9f9 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs @@ -375,10 +375,10 @@ public IntPtr Appender(IntPtr handle, IntPtr[] optKeys, IntPtr[] optValues, UInt /// Optional key/value pairs to specify execution provider options. public void AppendExecutionProvider(string providerName, Dictionary providerOptions = null) { - if (providerName != "SNPE" && providerName != "XNNPACK" && providerName != "QNN") + if (providerName != "SNPE" && providerName != "XNNPACK" && providerName != "QNN" && providerName != "AZURE") { throw new NotSupportedException( - "Only QNN, SNPE and XNNPACK execution providers can be enabled by this method."); + "Only QNN, SNPE, XNNPACK and AZURE execution providers can be enabled by this method."); } if (providerOptions == null) diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc index d6883d408f2e4..b70c504e6f19a 100644 --- a/onnxruntime/core/framework/cloud_invoker.cc +++ b/onnxruntime/core/framework/cloud_invoker.cc @@ -3,6 +3,7 @@ #ifdef USE_AZURE #include "http_client.h" +#include "curl/curl.h" #include "core/common/common.h" #include "core/framework/cloud_invoker.h" #include "core/framework/ort_value.h" @@ -25,6 +26,8 @@ const char* kAzureVerbose = "azure.verbose"; const char* kAzureEndpointType = "azure.endpoint_type"; const char* kAzureAuthKey = "azure.auth_key"; const char* kAzureTriton = "triton"; +const char* kAzureOpenAI = "openai"; +const char* kAzureAudioFile = "azure.audio_file"; CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator) : config_(config), allocator_(allocator) { @@ -33,6 +36,122 @@ CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config, } } +// OpenAIInvoker +class OpenAIInvoker : public CloudEndPointInvoker { + public: + OpenAIInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator); + onnxruntime::Status Send(const CloudEndPointConfig& run_options, + const InlinedVector& input_names, + gsl::span ort_inputs, + const InlinedVector& output_names, + std::vector& ort_outputs) const override; + + private: + std::string uri_; + std::string model_name_; +}; + +OpenAIInvoker::OpenAIInvoker(const CloudEndPointConfig& config, + const AllocatorPtr& allocator) : CloudEndPointInvoker(config, allocator) { + ReadConfig(kAzureUri, uri_); + ReadConfig(kAzureModelName, model_name_); +} + +struct MemoryStruct { + char* memory; + size_t size; +}; + +static size_t +WriteMemoryCallback(void* contents, size_t size, size_t nmemb, void* userp) { + size_t realsize = size * nmemb; + struct MemoryStruct* mem = (struct MemoryStruct*)userp; + + char* ptr = (char*)realloc(mem->memory, mem->size + realsize + 1); + ORT_ENFORCE(ptr, "not enough memory (realloc returned NULL)"); + + mem->memory = ptr; + memcpy(&(mem->memory[mem->size]), contents, realsize); + mem->size += realsize; + mem->memory[mem->size] = 0; + + return realsize; +} + +onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, + const InlinedVector& /*input_names*/, + gsl::span ort_inputs, + const InlinedVector& /*output_names*/, + std::vector& ort_outputs) const { + const auto auth_key_iter = run_options.find(kAzureAuthKey); + if (run_options.end() == auth_key_iter || auth_key_iter->second.empty()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "auth key must be specified for openai client"); + } + + CURLcode ret{}; + CURL* curl{}; + curl_mime* mime1{}; + struct curl_slist* headers{}; + + struct MemoryStruct chunk; + chunk.memory = (char*)malloc(1); /* will be grown as needed by the realloc above */ + chunk.size = 0; + + mime1 = NULL; + std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second; + headers = curl_slist_append(headers, full_auth.c_str()); + headers = curl_slist_append(headers, "Content-Type: multipart/form-data"); + + struct curl_httppost* post = NULL; + struct curl_httppost* last = NULL; + curl_formadd(&post, &last, CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END); + curl_formadd(&post, &last, CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END); + const auto& tensor = ort_inputs[0].Get(); + auto data_size = tensor.SizeInBytes(); + curl_formadd(&post, &last, CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(), + CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END); + curl = curl_easy_init(); + curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, 102400L); + curl_easy_setopt(curl, CURLOPT_URL, uri_.c_str()); + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + curl_easy_setopt(curl, CURLOPT_USERAGENT, "curl/7.83.1"); + curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 50L); + curl_easy_setopt(curl, CURLOPT_FTP_SKIP_PASV_IP, 1L); + curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1L); + curl_easy_setopt(curl, CURLOPT_HTTPPOST, post); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*)&chunk); + + ret = curl_easy_perform(curl); + if (ret != CURLE_OK) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(ret)); + } + + curl_easy_cleanup(curl); + curl = NULL; + curl_mime_free(mime1); + mime1 = NULL; + curl_slist_free_all(headers); + headers = NULL; + + auto output_tensor = std::make_unique(onnxruntime::DataTypeImpl::GetType(), TensorShape{1}, allocator_); + if (!output_tensor) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor"); + } + + auto* output_string = output_tensor->MutableData(); + output_string->append(chunk.memory); + free(chunk.memory); + + ort_outputs.resize(1); + auto tensor_type = DataTypeImpl::GetType(); + ort_outputs[0].Init(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc()); + return Status::OK(); +} + +// AzureTritonInvoker class AzureTritonInvoker : public CloudEndPointInvoker { public: AzureTritonInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator); @@ -287,6 +406,9 @@ Status CloudEndPointInvoker::CreateInvoker(const CloudEndPointConfig& config, if (iter->second == kAzureTriton) { invoker = std::make_unique(config, allocator); return status; + } else if (iter->second == kAzureOpenAI) { + invoker = std::make_unique(config, allocator); + return status; } // else other endpoint types ... } status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index 672cdd92f6a03..611e061ef4ce6 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -24,6 +24,16 @@ parameters: type: boolean default: false +- name: NugetPackageSuffix + displayName: Suffix to append to nuget package + type: string + default: '' + +- name: AdditionalBuildFlag + displayName: Build flags to append to build command + type: string + default: '' + resources: repositories: - repository: onnxruntime-inference-examples # The name used to reference this repository in the checkout step @@ -43,9 +53,9 @@ stages: DoCompliance: ${{ parameters.DoCompliance }} DoEsrp: ${{ parameters.DoEsrp }} IsReleaseBuild: ${{ parameters.IsReleaseBuild }} - OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime' + OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime${{ parameters.NugetPackageSuffix }}' AdditionalBuildFlags: '' - AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos' + AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos ${{parameters.AdditionalBuildFlag}}' BuildVariant: 'default' - template: templates/ondevice-training-cpu-packaging-pipeline.yml diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml index 2c42ad4693ef4..708ea540d445b 100644 --- a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml @@ -190,4 +190,25 @@ stages: ORT_EP_NAME: CPU GenerateDocumentation: false WITH_CACHE: true - MachinePool: 'onnxruntime-Win2019-CPU-training-AMD' \ No newline at end of file + MachinePool: 'onnxruntime-Win2019-CPU-training-AMD' + +- stage: x64_release_azure + dependsOn: [] + jobs: + - template: templates/win-ci-vs-2019.yml + parameters: + BuildConfig: 'RelWithDebInfo' + EnvSetupScript: setup_env_azure.bat + buildArch: x64 + additionalBuildFlags: --use_azure + msbuildPlatform: x64 + isX86: false + job_name_suffix: x64_release_azure + RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} + RunStaticCodeAnalysis: false + EnablePython: false + isTraining: false + ORT_EP_NAME: CPU + GenerateDocumentation: false + WITH_CACHE: true + MachinePool: 'onnxruntime-Win-CPU-2019' \ No newline at end of file From dfc5461a8a63f49f9c82d300f0ae0faf81b8b51b Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Wed, 3 May 2023 22:46:36 -0700 Subject: [PATCH 02/16] make memory allocation RAII --- .../main/java/ai/onnxruntime/OrtProvider.java | 3 +- onnxruntime/core/framework/cloud_invoker.cc | 66 ++++++++++--------- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/java/src/main/java/ai/onnxruntime/OrtProvider.java b/java/src/main/java/ai/onnxruntime/OrtProvider.java index cb35bf4f507f5..0da9487c675a2 100644 --- a/java/src/main/java/ai/onnxruntime/OrtProvider.java +++ b/java/src/main/java/ai/onnxruntime/OrtProvider.java @@ -23,7 +23,8 @@ public enum OrtProvider { ARM_NN("ArmNNExecutionProvider"), ROCM("ROCMExecutionProvider"), CORE_ML("CoreMLExecutionProvider"), - XNNPACK("XnnpackExecutionProvider"); + XNNPACK("XnnpackExecutionProvider"), + AZURE("AzureExecutionProvider"); private static final Map valueMap = new HashMap<>(values().length); diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc index b70c504e6f19a..54d1e9cd2217a 100644 --- a/onnxruntime/core/framework/cloud_invoker.cc +++ b/onnxruntime/core/framework/cloud_invoker.cc @@ -19,15 +19,14 @@ namespace onnxruntime { namespace tc = triton::client; -const char* kAzureUri = "azure.uri"; -const char* kAzureModelName = "azure.model_name"; -const char* kAzureModelVer = "azure.model_version"; -const char* kAzureVerbose = "azure.verbose"; -const char* kAzureEndpointType = "azure.endpoint_type"; -const char* kAzureAuthKey = "azure.auth_key"; -const char* kAzureTriton = "triton"; -const char* kAzureOpenAI = "openai"; -const char* kAzureAudioFile = "azure.audio_file"; +constexpr const char* kAzureUri = "azure.uri"; +constexpr const char* kAzureModelName = "azure.model_name"; +constexpr const char* kAzureModelVer = "azure.model_version"; +constexpr const char* kAzureVerbose = "azure.verbose"; +constexpr const char* kAzureEndpointType = "azure.endpoint_type"; +constexpr const char* kAzureAuthKey = "azure.auth_key"; +constexpr const char* kAzureTriton = "triton"; +constexpr const char* kAzureOpenAI = "openai"; CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator) : config_(config), allocator_(allocator) { @@ -57,24 +56,25 @@ OpenAIInvoker::OpenAIInvoker(const CloudEndPointConfig& config, ReadConfig(kAzureModelName, model_name_); } -struct MemoryStruct { - char* memory; - size_t size; -}; - -static size_t -WriteMemoryCallback(void* contents, size_t size, size_t nmemb, void* userp) { - size_t realsize = size * nmemb; - struct MemoryStruct* mem = (struct MemoryStruct*)userp; +struct ResponseBuffer { + ResponseBuffer() = default; + ~ResponseBuffer() = default; + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ResponseBuffer); - char* ptr = (char*)realloc(mem->memory, mem->size + realsize + 1); - ORT_ENFORCE(ptr, "not enough memory (realloc returned NULL)"); + using Chunk = std::unique_ptr; + std::list chunks_; - mem->memory = ptr; - memcpy(&(mem->memory[mem->size]), contents, realsize); - mem->size += realsize; - mem->memory[mem->size] = 0; + void Fill(std::string& s) const { + std::for_each(chunks_.begin(), chunks_.end(), [&](const Chunk& chunk) { s.append(chunk.get()); }); + } +}; +static size_t WriteResponseCallback(void* contents, size_t size, size_t nmemb, void* userp) { + size_t realsize = size * nmemb; + auto response = reinterpret_cast(userp); + response->chunks_.push_back(std::make_unique(realsize + 1)); + memcpy(response->chunks_.back().get(), contents, realsize); + response->chunks_.back()[realsize] = '\0'; return realsize; } @@ -88,15 +88,17 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "auth key must be specified for openai client"); } + long verbose = 0; + const auto verbose_iter = run_options.find(kAzureVerbose); + if (run_options.end() != verbose_iter) { + verbose = verbose_iter->second != "0" ? 1L : 0L; + } CURLcode ret{}; CURL* curl{}; curl_mime* mime1{}; struct curl_slist* headers{}; - - struct MemoryStruct chunk; - chunk.memory = (char*)malloc(1); /* will be grown as needed by the realloc above */ - chunk.size = 0; + ResponseBuffer response_buffer; mime1 = NULL; std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second; @@ -120,9 +122,10 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 50L); curl_easy_setopt(curl, CURLOPT_FTP_SKIP_PASV_IP, 1L); curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1L); + curl_easy_setopt(curl, CURLOPT_VERBOSE, verbose); curl_easy_setopt(curl, CURLOPT_HTTPPOST, post); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*)&chunk); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteResponseCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*)&response_buffer); ret = curl_easy_perform(curl); if (ret != CURLE_OK) { @@ -142,8 +145,7 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, } auto* output_string = output_tensor->MutableData(); - output_string->append(chunk.memory); - free(chunk.memory); + response_buffer.Fill(*output_string); ort_outputs.resize(1); auto tensor_type = DataTypeImpl::GetType(); From 8aaac2f6cbdec8fb8f82dbd5cb2ae4794db25c14 Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Thu, 4 May 2023 08:55:02 -0700 Subject: [PATCH 03/16] add curl to deps --- cmake/deps.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index 6b7fb0c95f1d8..0374f9278e6fd 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -47,5 +47,5 @@ boost;https://github.com/boostorg/boost/archive/refs/tags/boost-1.81.0.zip;f6ab0 b64;https://github.com/libb64/libb64/archive/refs/tags/v2.0.0.1.zip;815b6d31d50d9e63df55b25ce555e7b787153c28 pthread;https://sourceforge.net/projects/pthreads4w/files/pthreads4w-code-v3.0.0.zip;3b9e417e4474c34542b76ad40529e396ac109fb4 triton;https://github.com/triton-inference-server/server/archive/refs/tags/v2.28.0.zip;4b305570aa1e889946e20e36050b6770e4108fee -# above are deps introduced by triton client, might remove after 1.14 release extensions;https://github.com/microsoft/onnxruntime-extensions/archive/81e7799c69044c745239202085eb0a98f102937b.zip;d53487035174a046628359289ad27aa0ac0380c9 +curl; From 5c7c67cb3fc469f49e1873f1b1011980b1241ec8 Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Thu, 4 May 2023 09:42:53 -0700 Subject: [PATCH 04/16] wrap up curl handles --- onnxruntime/core/framework/cloud_invoker.cc | 84 +++++++++++++-------- 1 file changed, 51 insertions(+), 33 deletions(-) diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc index 54d1e9cd2217a..519f2baea5f8b 100644 --- a/onnxruntime/core/framework/cloud_invoker.cc +++ b/onnxruntime/core/framework/cloud_invoker.cc @@ -78,6 +78,43 @@ static size_t WriteResponseCallback(void* contents, size_t size, size_t nmemb, v return realsize; } +struct CurlHandler { + CurlHandler() { + curl_ = curl_easy_init(); + curl_easy_setopt(curl_, CURLOPT_BUFFERSIZE, 102400L); + curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 1L); + curl_easy_setopt(curl_, CURLOPT_USERAGENT, "curl/7.83.1"); + curl_easy_setopt(curl_, CURLOPT_MAXREDIRS, 50L); + curl_easy_setopt(curl_, CURLOPT_FTP_SKIP_PASV_IP, 1L); + curl_easy_setopt(curl_, CURLOPT_TCP_KEEPALIVE, 1L); + curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, WriteResponseCallback); + } + ~CurlHandler() { + if (curl_) { + curl_easy_cleanup(curl_); + curl_ = {}; + } + if (mime1_) { + curl_mime_free(mime1_); + mime1_ = {}; + } + if (headers_) { + curl_slist_free_all(headers_); + headers_ = {}; + } + if (from_) { + curl_formfree(from_); + from_ = {}; + } + } + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlHandler); + CURL* curl_{}; + curl_mime* mime1_{}; + struct curl_slist* headers_{}; + struct curl_httppost* from_{}; + struct curl_httppost* last_{}; +}; + onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, const InlinedVector& /*input_names*/, gsl::span ort_inputs, @@ -95,50 +132,31 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, } CURLcode ret{}; - CURL* curl{}; - curl_mime* mime1{}; - struct curl_slist* headers{}; + CurlHandler curl_handler; ResponseBuffer response_buffer; - mime1 = NULL; std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second; - headers = curl_slist_append(headers, full_auth.c_str()); - headers = curl_slist_append(headers, "Content-Type: multipart/form-data"); + curl_handler.headers_ = curl_slist_append(curl_handler.headers_, full_auth.c_str()); + curl_handler.headers_ = curl_slist_append(curl_handler.headers_, "Content-Type: multipart/form-data"); - struct curl_httppost* post = NULL; - struct curl_httppost* last = NULL; - curl_formadd(&post, &last, CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END); - curl_formadd(&post, &last, CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END); const auto& tensor = ort_inputs[0].Get(); auto data_size = tensor.SizeInBytes(); - curl_formadd(&post, &last, CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(), + curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END); + curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END); + curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(), CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END); - curl = curl_easy_init(); - curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, 102400L); - curl_easy_setopt(curl, CURLOPT_URL, uri_.c_str()); - curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); - curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); - curl_easy_setopt(curl, CURLOPT_USERAGENT, "curl/7.83.1"); - curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 50L); - curl_easy_setopt(curl, CURLOPT_FTP_SKIP_PASV_IP, 1L); - curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1L); - curl_easy_setopt(curl, CURLOPT_VERBOSE, verbose); - curl_easy_setopt(curl, CURLOPT_HTTPPOST, post); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteResponseCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*)&response_buffer); - - ret = curl_easy_perform(curl); + + curl_easy_setopt(curl_handler.curl_, CURLOPT_URL, uri_.c_str()); + curl_easy_setopt(curl_handler.curl_, CURLOPT_HTTPHEADER, curl_handler.headers_); + curl_easy_setopt(curl_handler.curl_, CURLOPT_VERBOSE, verbose); + curl_easy_setopt(curl_handler.curl_, CURLOPT_HTTPPOST, curl_handler.from_); + curl_easy_setopt(curl_handler.curl_, CURLOPT_WRITEDATA, (void*)&response_buffer); + + ret = curl_easy_perform(curl_handler.curl_); if (ret != CURLE_OK) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(ret)); } - curl_easy_cleanup(curl); - curl = NULL; - curl_mime_free(mime1); - mime1 = NULL; - curl_slist_free_all(headers); - headers = NULL; - auto output_tensor = std::make_unique(onnxruntime::DataTypeImpl::GetType(), TensorShape{1}, allocator_); if (!output_tensor) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor"); From 842f7fd68b0b65b52e633bc69b9bd4b1fdbbe11e Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Thu, 4 May 2023 09:48:58 -0700 Subject: [PATCH 05/16] set curl dep commit id --- cmake/deps.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index 0374f9278e6fd..baba0d463a54b 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -48,4 +48,4 @@ b64;https://github.com/libb64/libb64/archive/refs/tags/v2.0.0.1.zip;815b6d31d50d pthread;https://sourceforge.net/projects/pthreads4w/files/pthreads4w-code-v3.0.0.zip;3b9e417e4474c34542b76ad40529e396ac109fb4 triton;https://github.com/triton-inference-server/server/archive/refs/tags/v2.28.0.zip;4b305570aa1e889946e20e36050b6770e4108fee extensions;https://github.com/microsoft/onnxruntime-extensions/archive/81e7799c69044c745239202085eb0a98f102937b.zip;d53487035174a046628359289ad27aa0ac0380c9 -curl; +curl;https://github.com/curl/curl/archive/refs/tags/curl-8_0_1.zip;b16d1fa8ee567b52c09a0f89940b07d8491b881d From 1fec44ac26e83f2986e2c2a0c35a2c3f5e5befbd Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Thu, 4 May 2023 15:20:03 -0700 Subject: [PATCH 06/16] address comments --- onnxruntime/core/framework/cloud_invoker.cc | 105 +++++++++++++------- 1 file changed, 69 insertions(+), 36 deletions(-) diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc index 519f2baea5f8b..82a84b5a0bc85 100644 --- a/onnxruntime/core/framework/cloud_invoker.cc +++ b/onnxruntime/core/framework/cloud_invoker.cc @@ -2,6 +2,7 @@ // Licensed under the MIT License. #ifdef USE_AZURE +#define CURL_STATICLIB #include "http_client.h" #include "curl/curl.h" #include "core/common/common.h" @@ -35,6 +36,25 @@ CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config, } } +class CurlGlobal { + public: + static void Init() { + static CurlGlobal curl_global; + } + + private: + CurlGlobal() { + // Thread-safety is a must since curl might also be initialized in triton client. + const auto* info = curl_version_info(CURLVERSION_NOW); + ORT_ENFORCE(info->features & CURL_VERSION_THREADSAFE, "curl global init not thread-safe, need to upgrade curl version!"); + ORT_ENFORCE(curl_global_init(CURL_GLOBAL_DEFAULT) == CURLE_OK, "Failed to initialize curl global env!"); + } + ~CurlGlobal() { + curl_global_cleanup(); + } + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlGlobal); +}; + // OpenAIInvoker class OpenAIInvoker : public CloudEndPointInvoker { public: @@ -52,34 +72,31 @@ class OpenAIInvoker : public CloudEndPointInvoker { OpenAIInvoker::OpenAIInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator) : CloudEndPointInvoker(config, allocator) { + CurlGlobal::Init(); ReadConfig(kAzureUri, uri_); ReadConfig(kAzureModelName, model_name_); } -struct ResponseBuffer { - ResponseBuffer() = default; - ~ResponseBuffer() = default; - ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ResponseBuffer); - - using Chunk = std::unique_ptr; - std::list chunks_; - - void Fill(std::string& s) const { - std::for_each(chunks_.begin(), chunks_.end(), [&](const Chunk& chunk) { s.append(chunk.get()); }); - } +struct StringBuffer { + StringBuffer() = default; + ~StringBuffer() = default; + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(StringBuffer); + std::stringstream ss_; }; -static size_t WriteResponseCallback(void* contents, size_t size, size_t nmemb, void* userp) { +// applies only when contents is a string +static size_t WriteStringCallback(void* contents, size_t size, size_t nmemb, void* userp) { size_t realsize = size * nmemb; - auto response = reinterpret_cast(userp); - response->chunks_.push_back(std::make_unique(realsize + 1)); - memcpy(response->chunks_.back().get(), contents, realsize); - response->chunks_.back()[realsize] = '\0'; + auto buffer = reinterpret_cast(userp); + buffer->ss_ << reinterpret_cast(contents); return realsize; } -struct CurlHandler { - CurlHandler() { +using CurlWriteCallBack = size_t (*)(void*, size_t, size_t, void*); + +class CurlHandler { + public: + CurlHandler(CurlWriteCallBack call_back) { curl_ = curl_easy_init(); curl_easy_setopt(curl_, CURLOPT_BUFFERSIZE, 102400L); curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 1L); @@ -87,7 +104,7 @@ struct CurlHandler { curl_easy_setopt(curl_, CURLOPT_MAXREDIRS, 50L); curl_easy_setopt(curl_, CURLOPT_FTP_SKIP_PASV_IP, 1L); curl_easy_setopt(curl_, CURLOPT_TCP_KEEPALIVE, 1L); - curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, WriteResponseCallback); + curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, call_back); } ~CurlHandler() { if (curl_) { @@ -107,7 +124,25 @@ struct CurlHandler { from_ = {}; } } + void AddHeader(const char* data) { + headers_ = curl_slist_append(headers_, data); + } + template + void AddForm(Args... args) { + curl_formadd(&from_, &last_, args...); + } + template + void SetOption(CURLoption opt, T val) { + curl_easy_setopt(curl_, opt, val); + } ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlHandler); + CURLcode Perform() { + SetOption(CURLOPT_HTTPHEADER, headers_); + SetOption(CURLOPT_HTTPPOST, from_); + return curl_easy_perform(curl_); + } + + private: CURL* curl_{}; curl_mime* mime1_{}; struct curl_slist* headers_{}; @@ -132,27 +167,25 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, } CURLcode ret{}; - CurlHandler curl_handler; - ResponseBuffer response_buffer; + CurlHandler curl_handler(WriteStringCallback); + StringBuffer string_buffer; std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second; - curl_handler.headers_ = curl_slist_append(curl_handler.headers_, full_auth.c_str()); - curl_handler.headers_ = curl_slist_append(curl_handler.headers_, "Content-Type: multipart/form-data"); + curl_handler.AddHeader(full_auth.c_str()); + curl_handler.AddHeader("Content-Type: multipart/form-data"); const auto& tensor = ort_inputs[0].Get(); auto data_size = tensor.SizeInBytes(); - curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END); - curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END); - curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(), - CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END); - - curl_easy_setopt(curl_handler.curl_, CURLOPT_URL, uri_.c_str()); - curl_easy_setopt(curl_handler.curl_, CURLOPT_HTTPHEADER, curl_handler.headers_); - curl_easy_setopt(curl_handler.curl_, CURLOPT_VERBOSE, verbose); - curl_easy_setopt(curl_handler.curl_, CURLOPT_HTTPPOST, curl_handler.from_); - curl_easy_setopt(curl_handler.curl_, CURLOPT_WRITEDATA, (void*)&response_buffer); - - ret = curl_easy_perform(curl_handler.curl_); + curl_handler.AddForm(CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END); + curl_handler.AddForm(CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END); + curl_handler.AddForm(CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(), + CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END); + + curl_handler.SetOption(CURLOPT_URL, uri_.c_str()); + curl_handler.SetOption(CURLOPT_VERBOSE, verbose); + curl_handler.SetOption(CURLOPT_WRITEDATA, (void*)&string_buffer); + + ret = curl_handler.Perform(); if (ret != CURLE_OK) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(ret)); } @@ -163,7 +196,7 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, } auto* output_string = output_tensor->MutableData(); - response_buffer.Fill(*output_string); + *output_string = string_buffer.ss_.str(); ort_outputs.resize(1); auto tensor_type = DataTypeImpl::GetType(); From a6f18e3640d8bb3cfadf2db58d29ff4939383cdc Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Thu, 4 May 2023 17:07:40 -0700 Subject: [PATCH 07/16] write sized data to stream --- onnxruntime/core/framework/cloud_invoker.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc index 82a84b5a0bc85..313ebcb300906 100644 --- a/onnxruntime/core/framework/cloud_invoker.cc +++ b/onnxruntime/core/framework/cloud_invoker.cc @@ -84,11 +84,11 @@ struct StringBuffer { std::stringstream ss_; }; -// applies only when contents is a string +// apply the callback only when response is for sure to be a '/0' terminated string static size_t WriteStringCallback(void* contents, size_t size, size_t nmemb, void* userp) { size_t realsize = size * nmemb; auto buffer = reinterpret_cast(userp); - buffer->ss_ << reinterpret_cast(contents); + buffer->ss_.write(reinterpret_cast(contents), realsize); return realsize; } @@ -166,7 +166,6 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, verbose = verbose_iter->second != "0" ? 1L : 0L; } - CURLcode ret{}; CurlHandler curl_handler(WriteStringCallback); StringBuffer string_buffer; @@ -185,9 +184,9 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, curl_handler.SetOption(CURLOPT_VERBOSE, verbose); curl_handler.SetOption(CURLOPT_WRITEDATA, (void*)&string_buffer); - ret = curl_handler.Perform(); - if (ret != CURLE_OK) { - return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(ret)); + auto curl_ret = curl_handler.Perform(); + if (CURLE_OK != curl_ret) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(curl_ret)); } auto output_tensor = std::make_unique(onnxruntime::DataTypeImpl::GetType(), TensorShape{1}, allocator_); From 9c6bb2e15b5f0a876cdc51c57e8e4647a90a4cab Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Thu, 4 May 2023 17:30:55 -0700 Subject: [PATCH 08/16] attach curl license --- ThirdPartyNotices.txt | 29 +++++++++++++++++++++++++++ cgmanifests/generated/cgmanifest.json | 10 +++++++++ 2 files changed, 39 insertions(+) diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt index b4d981d42dfb8..ff1f826342d8a 100644 --- a/ThirdPartyNotices.txt +++ b/ThirdPartyNotices.txt @@ -5993,3 +5993,32 @@ https://github.com/tensorflow/tfjs WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +—— + +curl/curl + +https://github.com/curl + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (C) Daniel Stenberg, , and many +contributors, see the THANKS file. + +All rights reserved. + +Permission to use, copy, modify, and distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright +notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN +NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall not +be used in advertising or otherwise to promote the sale, use or other dealings +in this Software without prior written authorization of the copyright holder. \ No newline at end of file diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json index 989756361bd07..c9524d63907c1 100644 --- a/cgmanifests/generated/cgmanifest.json +++ b/cgmanifests/generated/cgmanifest.json @@ -447,6 +447,16 @@ }, "comments": "triton" } + }, + { + "component": { + "type": "git", + "git": { + "commitHash": "b16d1fa8ee567b52c09a0f89940b07d8491b881d", + "repositoryUrl": "https://github.com/curl/curl.git" + }, + "comments": "curl" + } } ] } From c0e72cc6acbd7987f25f464a450c3955da040963 Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Tue, 9 May 2023 09:12:03 -0700 Subject: [PATCH 09/16] merge main --- .../c-api-noopenmp-packaging-pipelines.yml | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index 84de2cc356b5d..d2f1c12bec881 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -24,17 +24,6 @@ parameters: type: boolean default: false -<<<<<<< HEAD -- name: NugetPackageSuffix - displayName: Suffix to append to nuget package - type: string - default: '' - -- name: AdditionalBuildFlag - displayName: Build flags to append to build command - type: string - default: '' -======= - name: PreReleaseVersionSuffixString displayName: Suffix added to pre-release package version. Only used if IsReleaseBuild is true. Denotes the type of pre-release package. type: string @@ -60,7 +49,16 @@ parameters: displayName: Pipeline BuildId, you could find it in the URL type: string default: '0' ->>>>>>> main + +- name: NugetPackageSuffix + displayName: Suffix to append to nuget package + type: string + default: '' + +- name: AdditionalBuildFlag + displayName: Build flags to append to build command + type: string + default: '' resources: repositories: From e558c32589b8e007ac7598c6b8ec0ec71b4724d5 Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Tue, 9 May 2023 11:50:38 -0700 Subject: [PATCH 10/16] remove zlib from cmake --- cmake/external/triton.cmake | 12 +++++------- cmake/onnxruntime_framework.cmake | 4 ++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/cmake/external/triton.cmake b/cmake/external/triton.cmake index 1bc2d620d8784..c25cb594e8f8d 100644 --- a/cmake/external/triton.cmake +++ b/cmake/external/triton.cmake @@ -44,13 +44,11 @@ if (WIN32) vcpkg_install(re2) vcpkg_install(boost-interprocess) vcpkg_install(boost-stacktrace) - vcpkg_install(zlib) vcpkg_install(pthread) vcpkg_install(b64) add_dependencies(getb64 getpthread) - add_dependencies(getpthread getzlib) - add_dependencies(getzlib getboost-stacktrace) + add_dependencies(getpthread getboost-stacktrace) add_dependencies(getboost-stacktrace getboost-interprocess) add_dependencies(getboost-interprocess getre2) add_dependencies(getre2 getrapidjson) @@ -59,11 +57,11 @@ if (WIN32) ExternalProject_Add(triton GIT_REPOSITORY https://github.com/triton-inference-server/client.git - #GIT_TAG r22.12 + GIT_TAG main PREFIX triton SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build - CMAKE_ARGS -DVCPKG_TARGET_TRIPLET=${onnxruntime_target_platform}-windows -DCMAKE_TOOLCHAIN_FILE=${VCPKG_SRC}/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON + CMAKE_ARGS -DVCPKG_TARGET_TRIPLET=${onnxruntime_target_platform}-windows -DCMAKE_TOOLCHAIN_FILE=${VCPKG_SRC}/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_ZLIB=OFF INSTALL_COMMAND "" UPDATE_COMMAND "") @@ -85,11 +83,11 @@ else() ExternalProject_Add(triton GIT_REPOSITORY https://github.com/triton-inference-server/client.git - #GIT_TAG r22.12 + GIT_TAG main PREFIX triton SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_ZLIB=OFF INSTALL_COMMAND "" UPDATE_COMMAND "") diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake index b5cd2c3d1b201..fcbae6ad32c50 100644 --- a/cmake/onnxruntime_framework.cmake +++ b/cmake/onnxruntime_framework.cmake @@ -46,13 +46,13 @@ if (onnxruntime_USE_AZURE) if (WIN32) link_directories(${VCPKG_SRC}/installed/${onnxruntime_target_platform}-windows/lib) - target_link_libraries(onnxruntime_framework PRIVATE libcurl httpclient_static ws2_32 crypt32 Wldap32 zlib) + target_link_libraries(onnxruntime_framework PRIVATE libcurl httpclient_static ws2_32 crypt32 Wldap32) else() find_package(ZLIB REQUIRED) find_package(OpenSSL REQUIRED) - target_link_libraries(onnxruntime_framework PRIVATE httpclient_static curl ZLIB::ZLIB OpenSSL::Crypto OpenSSL::SSL) + target_link_libraries(onnxruntime_framework PRIVATE httpclient_static curl OpenSSL::Crypto OpenSSL::SSL) endif() #if (WIN32) From 6b88835a8aa2748fd38df4ab2e6072801379c059 Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Wed, 10 May 2023 10:35:03 -0700 Subject: [PATCH 11/16] update 3rd party notice --- ThirdPartyNotices.txt | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt index b4d981d42dfb8..ff1f826342d8a 100644 --- a/ThirdPartyNotices.txt +++ b/ThirdPartyNotices.txt @@ -5993,3 +5993,32 @@ https://github.com/tensorflow/tfjs WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +—— + +curl/curl + +https://github.com/curl + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (C) Daniel Stenberg, , and many +contributors, see the THANKS file. + +All rights reserved. + +Permission to use, copy, modify, and distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright +notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN +NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall not +be used in advertising or otherwise to promote the sale, use or other dealings +in this Software without prior written authorization of the copyright holder. \ No newline at end of file From 94279c1868cc4314d2305088ed36f6811e1eb0c2 Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Wed, 10 May 2023 12:56:30 -0700 Subject: [PATCH 12/16] stick to triton client r23.05 --- cmake/external/triton.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/external/triton.cmake b/cmake/external/triton.cmake index c25cb594e8f8d..9f9da0d8a6baf 100644 --- a/cmake/external/triton.cmake +++ b/cmake/external/triton.cmake @@ -57,7 +57,7 @@ if (WIN32) ExternalProject_Add(triton GIT_REPOSITORY https://github.com/triton-inference-server/client.git - GIT_TAG main + GIT_TAG r23.05 PREFIX triton SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build @@ -83,7 +83,7 @@ else() ExternalProject_Add(triton GIT_REPOSITORY https://github.com/triton-inference-server/client.git - GIT_TAG main + GIT_TAG r23.05 PREFIX triton SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build From ae7d534aef87412079735ffe6eac15340af2a492 Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Wed, 10 May 2023 22:50:52 -0700 Subject: [PATCH 13/16] fix comments --- onnxruntime/core/framework/cloud_invoker.cc | 75 +++++++++------------ 1 file changed, 32 insertions(+), 43 deletions(-) diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc index 313ebcb300906..c14423f6963b0 100644 --- a/onnxruntime/core/framework/cloud_invoker.cc +++ b/onnxruntime/core/framework/cloud_invoker.cc @@ -86,46 +86,36 @@ struct StringBuffer { // apply the callback only when response is for sure to be a '/0' terminated string static size_t WriteStringCallback(void* contents, size_t size, size_t nmemb, void* userp) { - size_t realsize = size * nmemb; - auto buffer = reinterpret_cast(userp); - buffer->ss_.write(reinterpret_cast(contents), realsize); - return realsize; + try { + size_t realsize = size * nmemb; + auto buffer = reinterpret_cast(userp); + buffer->ss_.write(reinterpret_cast(contents), realsize); + return realsize; + } catch (...) { + // exception caught, abort write + return CURLcode::CURLE_WRITE_ERROR; + } } using CurlWriteCallBack = size_t (*)(void*, size_t, size_t, void*); class CurlHandler { public: - CurlHandler(CurlWriteCallBack call_back) { - curl_ = curl_easy_init(); - curl_easy_setopt(curl_, CURLOPT_BUFFERSIZE, 102400L); - curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 1L); - curl_easy_setopt(curl_, CURLOPT_USERAGENT, "curl/7.83.1"); - curl_easy_setopt(curl_, CURLOPT_MAXREDIRS, 50L); - curl_easy_setopt(curl_, CURLOPT_FTP_SKIP_PASV_IP, 1L); - curl_easy_setopt(curl_, CURLOPT_TCP_KEEPALIVE, 1L); - curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, call_back); - } - ~CurlHandler() { - if (curl_) { - curl_easy_cleanup(curl_); - curl_ = {}; - } - if (mime1_) { - curl_mime_free(mime1_); - mime1_ = {}; - } - if (headers_) { - curl_slist_free_all(headers_); - headers_ = {}; - } - if (from_) { - curl_formfree(from_); - from_ = {}; - } + CurlHandler(CurlWriteCallBack call_back) : curl_(curl_easy_init(), curl_easy_cleanup), + headers_(nullptr, curl_slist_free_all), + from_holder_(from_, curl_formfree) { + curl_easy_setopt(curl_.get(), CURLOPT_BUFFERSIZE, 102400L); + curl_easy_setopt(curl_.get(), CURLOPT_NOPROGRESS, 1L); + curl_easy_setopt(curl_.get(), CURLOPT_USERAGENT, "curl/7.83.1"); + curl_easy_setopt(curl_.get(), CURLOPT_MAXREDIRS, 50L); + curl_easy_setopt(curl_.get(), CURLOPT_FTP_SKIP_PASV_IP, 1L); + curl_easy_setopt(curl_.get(), CURLOPT_TCP_KEEPALIVE, 1L); + curl_easy_setopt(curl_.get(), CURLOPT_WRITEFUNCTION, call_back); } + ~CurlHandler() = default; + void AddHeader(const char* data) { - headers_ = curl_slist_append(headers_, data); + headers_.reset(curl_slist_append(headers_.get(), data)); } template void AddForm(Args... args) { @@ -133,21 +123,22 @@ class CurlHandler { } template void SetOption(CURLoption opt, T val) { - curl_easy_setopt(curl_, opt, val); + curl_easy_setopt(curl_.get(), opt, val); } ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlHandler); CURLcode Perform() { - SetOption(CURLOPT_HTTPHEADER, headers_); + SetOption(CURLOPT_HTTPHEADER, headers_.get()); SetOption(CURLOPT_HTTPPOST, from_); - return curl_easy_perform(curl_); + return curl_easy_perform(curl_.get()); } private: - CURL* curl_{}; - curl_mime* mime1_{}; - struct curl_slist* headers_{}; - struct curl_httppost* from_{}; - struct curl_httppost* last_{}; + + std::unique_ptr curl_; + std::unique_ptr headers_; + curl_httppost* from_{}; + curl_httppost* last_{}; + std::unique_ptr from_holder_; }; onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, @@ -196,10 +187,8 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, auto* output_string = output_tensor->MutableData(); *output_string = string_buffer.ss_.str(); - - ort_outputs.resize(1); auto tensor_type = DataTypeImpl::GetType(); - ort_outputs[0].Init(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc()); + ort_outputs.emplace_back(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc()); return Status::OK(); } From 7e51a9cb1073eaa628f50090d675b5a5c3d0335c Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Wed, 10 May 2023 23:44:02 -0700 Subject: [PATCH 14/16] fix header issue --- onnxruntime/core/framework/cloud_invoker.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc index c14423f6963b0..c6cde67d21006 100644 --- a/onnxruntime/core/framework/cloud_invoker.cc +++ b/onnxruntime/core/framework/cloud_invoker.cc @@ -115,7 +115,7 @@ class CurlHandler { ~CurlHandler() = default; void AddHeader(const char* data) { - headers_.reset(curl_slist_append(headers_.get(), data)); + headers_.reset(curl_slist_append(headers_.release(), data)); } template void AddForm(Args... args) { From fdad7c78a597d9d075bdd311914400fe6004cb07 Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Thu, 11 May 2023 08:33:35 -0700 Subject: [PATCH 15/16] format code --- onnxruntime/core/framework/cloud_invoker.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc index c6cde67d21006..0f07ac29464e5 100644 --- a/onnxruntime/core/framework/cloud_invoker.cc +++ b/onnxruntime/core/framework/cloud_invoker.cc @@ -133,7 +133,6 @@ class CurlHandler { } private: - std::unique_ptr curl_; std::unique_ptr headers_; curl_httppost* from_{}; From 866db7e1bb22abcc8be7ea23abaeaf60a251dbc5 Mon Sep 17 00:00:00 2001 From: Randy Shuai Date: Thu, 11 May 2023 11:49:37 -0700 Subject: [PATCH 16/16] clear fetch before emplace --- onnxruntime/core/framework/cloud_invoker.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc index 0f07ac29464e5..a2e9ec97cf931 100644 --- a/onnxruntime/core/framework/cloud_invoker.cc +++ b/onnxruntime/core/framework/cloud_invoker.cc @@ -187,6 +187,7 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options, auto* output_string = output_tensor->MutableData(); *output_string = string_buffer.ss_.str(); auto tensor_type = DataTypeImpl::GetType(); + ort_outputs.clear(); ort_outputs.emplace_back(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc()); return Status::OK(); }