From a1feb541099e0957a6f303945c5df5e92cd765c7 Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Wed, 3 May 2023 14:12:52 -0700
Subject: [PATCH 01/16] implement openAi endpoint in nuget

---
 cmake/onnxruntime_framework.cmake             |   2 +-
 .../Microsoft.ML.OnnxRuntime.csproj           |   1 +
 .../SessionOptions.shared.cs                  |   4 +-
 onnxruntime/core/framework/cloud_invoker.cc   | 122 ++++++++++++++++++
 .../c-api-noopenmp-packaging-pipelines.yml    |  14 +-
 .../azure-pipelines/win-ci-pipeline.yml       |  23 +++-
 6 files changed, 160 insertions(+), 6 deletions(-)
diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake
index 5c947a52b7838..b5cd2c3d1b201 100644
--- a/cmake/onnxruntime_framework.cmake
+++ b/cmake/onnxruntime_framework.cmake
@@ -40,7 +40,7 @@ onnxruntime_add_static_library(onnxruntime_framework ${onnxruntime_framework_src
 if (onnxruntime_USE_AZURE)
 
   add_dependencies(onnxruntime_framework triton)
-  target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include)
+  target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include ${TRITON_THIRD_PARTY}/curl/include)
   link_directories(${TRITON_BIN}/lib ${TRITON_BIN}/lib64 ${TRITON_THIRD_PARTY}/curl/lib ${TRITON_THIRD_PARTY}/curl/lib64)
 
   if (WIN32)
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
index 78083a8cc1358..ad468b0c6d507 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
@@ -41,6 +41,7 @@
   <!-- only set the .net6 targets if we're building an ORT package.
        we can add .net6 support to other packages later as needed -->
   <PropertyGroup Condition="('$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime' OR
+                             '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Azure' OR
                              '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Gpu')">
     <Net6Targets>net6.0;net6.0-android;net6.0-ios;net6.0-macos</Net6Targets>
   </PropertyGroup>
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs
index 6bbb159a3de37..30951bae3f9f9 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs
@@ -375,10 +375,10 @@ public IntPtr Appender(IntPtr handle, IntPtr[] optKeys, IntPtr[] optValues, UInt
         /// <param name="providerOptions">Optional key/value pairs to specify execution provider options.</param>
         public void AppendExecutionProvider(string providerName, Dictionary<string, string> providerOptions = null)
         {
-            if (providerName != "SNPE" && providerName != "XNNPACK" && providerName != "QNN")
+            if (providerName != "SNPE" && providerName != "XNNPACK" && providerName != "QNN" && providerName != "AZURE")
             {
                 throw new NotSupportedException(
-                    "Only QNN, SNPE and XNNPACK execution providers can be enabled by this method.");
+                    "Only QNN, SNPE, XNNPACK and AZURE execution providers can be enabled by this method.");
             }
 
             if (providerOptions == null)
diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
index d6883d408f2e4..b70c504e6f19a 100644
--- a/onnxruntime/core/framework/cloud_invoker.cc
+++ b/onnxruntime/core/framework/cloud_invoker.cc
@@ -3,6 +3,7 @@
 
 #ifdef USE_AZURE
 #include "http_client.h"
+#include "curl/curl.h"
 #include "core/common/common.h"
 #include "core/framework/cloud_invoker.h"
 #include "core/framework/ort_value.h"
@@ -25,6 +26,8 @@ const char* kAzureVerbose = "azure.verbose";
 const char* kAzureEndpointType = "azure.endpoint_type";
 const char* kAzureAuthKey = "azure.auth_key";
 const char* kAzureTriton = "triton";
+const char* kAzureOpenAI = "openai";
+const char* kAzureAudioFile = "azure.audio_file";
 
 CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config,
                                            const AllocatorPtr& allocator) : config_(config), allocator_(allocator) {
@@ -33,6 +36,122 @@ CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config,
   }
 }
 
+// OpenAIInvoker
+class OpenAIInvoker : public CloudEndPointInvoker {
+ public:
+  OpenAIInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator);
+  onnxruntime::Status Send(const CloudEndPointConfig& run_options,
+                           const InlinedVector<std::string>& input_names,
+                           gsl::span<const OrtValue> ort_inputs,
+                           const InlinedVector<std::string>& output_names,
+                           std::vector<OrtValue>& ort_outputs) const override;
+
+ private:
+  std::string uri_;
+  std::string model_name_;
+};
+
+OpenAIInvoker::OpenAIInvoker(const CloudEndPointConfig& config,
+                             const AllocatorPtr& allocator) : CloudEndPointInvoker(config, allocator) {
+  ReadConfig(kAzureUri, uri_);
+  ReadConfig(kAzureModelName, model_name_);
+}
+
+struct MemoryStruct {
+  char* memory;
+  size_t size;
+};
+
+static size_t
+WriteMemoryCallback(void* contents, size_t size, size_t nmemb, void* userp) {
+  size_t realsize = size * nmemb;
+  struct MemoryStruct* mem = (struct MemoryStruct*)userp;
+
+  char* ptr = (char*)realloc(mem->memory, mem->size + realsize + 1);
+  ORT_ENFORCE(ptr, "not enough memory (realloc returned NULL)");
+
+  mem->memory = ptr;
+  memcpy(&(mem->memory[mem->size]), contents, realsize);
+  mem->size += realsize;
+  mem->memory[mem->size] = 0;
+
+  return realsize;
+}
+
+onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
+                                        const InlinedVector<std::string>& /*input_names*/,
+                                        gsl::span<const OrtValue> ort_inputs,
+                                        const InlinedVector<std::string>& /*output_names*/,
+                                        std::vector<OrtValue>& ort_outputs) const {
+  const auto auth_key_iter = run_options.find(kAzureAuthKey);
+  if (run_options.end() == auth_key_iter || auth_key_iter->second.empty()) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "auth key must be specified for openai client");
+  }
+
+  CURLcode ret{};
+  CURL* curl{};
+  curl_mime* mime1{};
+  struct curl_slist* headers{};
+
+  struct MemoryStruct chunk;
+  chunk.memory = (char*)malloc(1); /* will be grown as needed by the realloc above */
+  chunk.size = 0;
+
+  mime1 = NULL;
+  std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second;
+  headers = curl_slist_append(headers, full_auth.c_str());
+  headers = curl_slist_append(headers, "Content-Type: multipart/form-data");
+
+  struct curl_httppost* post = NULL;
+  struct curl_httppost* last = NULL;
+  curl_formadd(&post, &last, CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END);
+  curl_formadd(&post, &last, CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END);
+  const auto& tensor = ort_inputs[0].Get<Tensor>();
+  auto data_size = tensor.SizeInBytes();
+  curl_formadd(&post, &last, CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(),
+               CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END);
+  curl = curl_easy_init();
+  curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, 102400L);
+  curl_easy_setopt(curl, CURLOPT_URL, uri_.c_str());
+  curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L);
+  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+  curl_easy_setopt(curl, CURLOPT_USERAGENT, "curl/7.83.1");
+  curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 50L);
+  curl_easy_setopt(curl, CURLOPT_FTP_SKIP_PASV_IP, 1L);
+  curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1L);
+  curl_easy_setopt(curl, CURLOPT_HTTPPOST, post);
+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
+  curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*)&chunk);
+
+  ret = curl_easy_perform(curl);
+  if (ret != CURLE_OK) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(ret));
+  }
+
+  curl_easy_cleanup(curl);
+  curl = NULL;
+  curl_mime_free(mime1);
+  mime1 = NULL;
+  curl_slist_free_all(headers);
+  headers = NULL;
+
+  auto output_tensor = std::make_unique<Tensor>(onnxruntime::DataTypeImpl::GetType<std::string>(), TensorShape{1}, allocator_);
+  if (!output_tensor) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor");
+  }
+
+  auto* output_string = output_tensor->MutableData<std::string>();
+  output_string->append(chunk.memory);
+  free(chunk.memory);
+
+  ort_outputs.resize(1);
+  auto tensor_type = DataTypeImpl::GetType<Tensor>();
+  ort_outputs[0].Init(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc());
+  return Status::OK();
+}
+
+// AzureTritonInvoker
 class AzureTritonInvoker : public CloudEndPointInvoker {
  public:
   AzureTritonInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator);
@@ -287,6 +406,9 @@ Status CloudEndPointInvoker::CreateInvoker(const CloudEndPointConfig& config,
       if (iter->second == kAzureTriton) {
         invoker = std::make_unique<AzureTritonInvoker>(config, allocator);
         return status;
+      } else if (iter->second == kAzureOpenAI) {
+        invoker = std::make_unique<OpenAIInvoker>(config, allocator);
+        return status;
       }  // else other endpoint types ...
     }
     status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
index 672cdd92f6a03..611e061ef4ce6 100644
--- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
+++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
@@ -24,6 +24,16 @@ parameters:
   type: boolean
   default: false
 
+- name: NugetPackageSuffix
+  displayName: Suffix to append to nuget package
+  type: string
+  default: ''
+
+- name: AdditionalBuildFlag
+  displayName: Build flags to append to build command
+  type: string
+  default: ''
+
 resources:
   repositories:
   - repository: onnxruntime-inference-examples # The name used to reference this repository in the checkout step
@@ -43,9 +53,9 @@ stages:
     DoCompliance: ${{ parameters.DoCompliance }}
     DoEsrp: ${{ parameters.DoEsrp }}
     IsReleaseBuild: ${{ parameters.IsReleaseBuild }}
-    OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime'
+    OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime${{ parameters.NugetPackageSuffix }}'
     AdditionalBuildFlags: ''
-    AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos'
+    AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos ${{parameters.AdditionalBuildFlag}}'
     BuildVariant: 'default'
 
 - template: templates/ondevice-training-cpu-packaging-pipeline.yml
diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
index 2c42ad4693ef4..708ea540d445b 100644
--- a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
@@ -190,4 +190,25 @@ stages:
         ORT_EP_NAME: CPU
         GenerateDocumentation: false
         WITH_CACHE: true
-        MachinePool: 'onnxruntime-Win2019-CPU-training-AMD'
\ No newline at end of file
+        MachinePool: 'onnxruntime-Win2019-CPU-training-AMD'
+
+- stage: x64_release_azure
+  dependsOn: []
+  jobs:
+    - template: templates/win-ci-vs-2019.yml
+      parameters:
+        BuildConfig: 'RelWithDebInfo'
+        EnvSetupScript: setup_env_azure.bat
+        buildArch: x64
+        additionalBuildFlags: --use_azure
+        msbuildPlatform: x64
+        isX86: false
+        job_name_suffix: x64_release_azure
+        RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
+        RunStaticCodeAnalysis: false
+        EnablePython: false
+        isTraining: false
+        ORT_EP_NAME: CPU
+        GenerateDocumentation: false
+        WITH_CACHE: true
+        MachinePool: 'onnxruntime-Win-CPU-2019'
\ No newline at end of file

From dfc5461a8a63f49f9c82d300f0ae0faf81b8b51b Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Wed, 3 May 2023 22:46:36 -0700
Subject: [PATCH 02/16] make memory allocation RAII

---
 .../main/java/ai/onnxruntime/OrtProvider.java |  3 +-
 onnxruntime/core/framework/cloud_invoker.cc   | 66 ++++++++++---------
 2 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/java/src/main/java/ai/onnxruntime/OrtProvider.java b/java/src/main/java/ai/onnxruntime/OrtProvider.java
index cb35bf4f507f5..0da9487c675a2 100644
--- a/java/src/main/java/ai/onnxruntime/OrtProvider.java
+++ b/java/src/main/java/ai/onnxruntime/OrtProvider.java
@@ -23,7 +23,8 @@ public enum OrtProvider {
   ARM_NN("ArmNNExecutionProvider"),
   ROCM("ROCMExecutionProvider"),
   CORE_ML("CoreMLExecutionProvider"),
-  XNNPACK("XnnpackExecutionProvider");
+  XNNPACK("XnnpackExecutionProvider"),
+  AZURE("AzureExecutionProvider");
 
   private static final Map<String, OrtProvider> valueMap = new HashMap<>(values().length);
 
diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
index b70c504e6f19a..54d1e9cd2217a 100644
--- a/onnxruntime/core/framework/cloud_invoker.cc
+++ b/onnxruntime/core/framework/cloud_invoker.cc
@@ -19,15 +19,14 @@ namespace onnxruntime {
 
 namespace tc = triton::client;
 
-const char* kAzureUri = "azure.uri";
-const char* kAzureModelName = "azure.model_name";
-const char* kAzureModelVer = "azure.model_version";
-const char* kAzureVerbose = "azure.verbose";
-const char* kAzureEndpointType = "azure.endpoint_type";
-const char* kAzureAuthKey = "azure.auth_key";
-const char* kAzureTriton = "triton";
-const char* kAzureOpenAI = "openai";
-const char* kAzureAudioFile = "azure.audio_file";
+constexpr const char* kAzureUri = "azure.uri";
+constexpr const char* kAzureModelName = "azure.model_name";
+constexpr const char* kAzureModelVer = "azure.model_version";
+constexpr const char* kAzureVerbose = "azure.verbose";
+constexpr const char* kAzureEndpointType = "azure.endpoint_type";
+constexpr const char* kAzureAuthKey = "azure.auth_key";
+constexpr const char* kAzureTriton = "triton";
+constexpr const char* kAzureOpenAI = "openai";
 
 CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config,
                                            const AllocatorPtr& allocator) : config_(config), allocator_(allocator) {
@@ -57,24 +56,25 @@ OpenAIInvoker::OpenAIInvoker(const CloudEndPointConfig& config,
   ReadConfig(kAzureModelName, model_name_);
 }
 
-struct MemoryStruct {
-  char* memory;
-  size_t size;
-};
-
-static size_t
-WriteMemoryCallback(void* contents, size_t size, size_t nmemb, void* userp) {
-  size_t realsize = size * nmemb;
-  struct MemoryStruct* mem = (struct MemoryStruct*)userp;
+struct ResponseBuffer {
+  ResponseBuffer() = default;
+  ~ResponseBuffer() = default;
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ResponseBuffer);
 
-  char* ptr = (char*)realloc(mem->memory, mem->size + realsize + 1);
-  ORT_ENFORCE(ptr, "not enough memory (realloc returned NULL)");
+  using Chunk = std::unique_ptr<char[]>;
+  std::list<Chunk> chunks_;
 
-  mem->memory = ptr;
-  memcpy(&(mem->memory[mem->size]), contents, realsize);
-  mem->size += realsize;
-  mem->memory[mem->size] = 0;
+  void Fill(std::string& s) const {
+    std::for_each(chunks_.begin(), chunks_.end(), [&](const Chunk& chunk) { s.append(chunk.get()); });
+  }
+};
 
+static size_t WriteResponseCallback(void* contents, size_t size, size_t nmemb, void* userp) {
+  size_t realsize = size * nmemb;
+  auto response = reinterpret_cast<struct ResponseBuffer*>(userp);
+  response->chunks_.push_back(std::make_unique<char[]>(realsize + 1));
+  memcpy(response->chunks_.back().get(), contents, realsize);
+  response->chunks_.back()[realsize] = '\0';
   return realsize;
 }
 
@@ -88,15 +88,17 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                            "auth key must be specified for openai client");
   }
+  long verbose = 0;
+  const auto verbose_iter = run_options.find(kAzureVerbose);
+  if (run_options.end() != verbose_iter) {
+    verbose = verbose_iter->second != "0" ? 1L : 0L;
+  }
 
   CURLcode ret{};
   CURL* curl{};
   curl_mime* mime1{};
   struct curl_slist* headers{};
-
-  struct MemoryStruct chunk;
-  chunk.memory = (char*)malloc(1); /* will be grown as needed by the realloc above */
-  chunk.size = 0;
+  ResponseBuffer response_buffer;
 
   mime1 = NULL;
   std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second;
@@ -120,9 +122,10 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
   curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 50L);
   curl_easy_setopt(curl, CURLOPT_FTP_SKIP_PASV_IP, 1L);
   curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1L);
+  curl_easy_setopt(curl, CURLOPT_VERBOSE, verbose);
   curl_easy_setopt(curl, CURLOPT_HTTPPOST, post);
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*)&chunk);
+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteResponseCallback);
+  curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*)&response_buffer);
 
   ret = curl_easy_perform(curl);
   if (ret != CURLE_OK) {
@@ -142,8 +145,7 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
   }
 
   auto* output_string = output_tensor->MutableData<std::string>();
-  output_string->append(chunk.memory);
-  free(chunk.memory);
+  response_buffer.Fill(*output_string);
 
   ort_outputs.resize(1);
   auto tensor_type = DataTypeImpl::GetType<Tensor>();

From 8aaac2f6cbdec8fb8f82dbd5cb2ae4794db25c14 Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Thu, 4 May 2023 08:55:02 -0700
Subject: [PATCH 03/16] add curl to deps

---
 cmake/deps.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/deps.txt b/cmake/deps.txt
index 6b7fb0c95f1d8..0374f9278e6fd 100644
--- a/cmake/deps.txt
+++ b/cmake/deps.txt
@@ -47,5 +47,5 @@ boost;https://github.com/boostorg/boost/archive/refs/tags/boost-1.81.0.zip;f6ab0
 b64;https://github.com/libb64/libb64/archive/refs/tags/v2.0.0.1.zip;815b6d31d50d9e63df55b25ce555e7b787153c28
 pthread;https://sourceforge.net/projects/pthreads4w/files/pthreads4w-code-v3.0.0.zip;3b9e417e4474c34542b76ad40529e396ac109fb4
 triton;https://github.com/triton-inference-server/server/archive/refs/tags/v2.28.0.zip;4b305570aa1e889946e20e36050b6770e4108fee
-# above are deps introduced by triton client, might remove after 1.14 release
 extensions;https://github.com/microsoft/onnxruntime-extensions/archive/81e7799c69044c745239202085eb0a98f102937b.zip;d53487035174a046628359289ad27aa0ac0380c9
+curl;

From 5c7c67cb3fc469f49e1873f1b1011980b1241ec8 Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Thu, 4 May 2023 09:42:53 -0700
Subject: [PATCH 04/16] wrap up curl handles

---
 onnxruntime/core/framework/cloud_invoker.cc | 84 +++++++++++++--------
 1 file changed, 51 insertions(+), 33 deletions(-)

diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
index 54d1e9cd2217a..519f2baea5f8b 100644
--- a/onnxruntime/core/framework/cloud_invoker.cc
+++ b/onnxruntime/core/framework/cloud_invoker.cc
@@ -78,6 +78,43 @@ static size_t WriteResponseCallback(void* contents, size_t size, size_t nmemb, v
   return realsize;
 }
 
+struct CurlHandler {
+  CurlHandler() {
+    curl_ = curl_easy_init();
+    curl_easy_setopt(curl_, CURLOPT_BUFFERSIZE, 102400L);
+    curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 1L);
+    curl_easy_setopt(curl_, CURLOPT_USERAGENT, "curl/7.83.1");
+    curl_easy_setopt(curl_, CURLOPT_MAXREDIRS, 50L);
+    curl_easy_setopt(curl_, CURLOPT_FTP_SKIP_PASV_IP, 1L);
+    curl_easy_setopt(curl_, CURLOPT_TCP_KEEPALIVE, 1L);
+    curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, WriteResponseCallback);
+  }
+  ~CurlHandler() {
+    if (curl_) {
+      curl_easy_cleanup(curl_);
+      curl_ = {};
+    }
+    if (mime1_) {
+      curl_mime_free(mime1_);
+      mime1_ = {};
+    }
+    if (headers_) {
+      curl_slist_free_all(headers_);
+      headers_ = {};
+    }
+    if (from_) {
+      curl_formfree(from_);
+      from_ = {};
+    }
+  }
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlHandler);
+  CURL* curl_{};
+  curl_mime* mime1_{};
+  struct curl_slist* headers_{};
+  struct curl_httppost* from_{};
+  struct curl_httppost* last_{};
+};
+
 onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
                                         const InlinedVector<std::string>& /*input_names*/,
                                         gsl::span<const OrtValue> ort_inputs,
@@ -95,50 +132,31 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
   }
 
   CURLcode ret{};
-  CURL* curl{};
-  curl_mime* mime1{};
-  struct curl_slist* headers{};
+  CurlHandler curl_handler;
   ResponseBuffer response_buffer;
 
-  mime1 = NULL;
   std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second;
-  headers = curl_slist_append(headers, full_auth.c_str());
-  headers = curl_slist_append(headers, "Content-Type: multipart/form-data");
+  curl_handler.headers_ = curl_slist_append(curl_handler.headers_, full_auth.c_str());
+  curl_handler.headers_ = curl_slist_append(curl_handler.headers_, "Content-Type: multipart/form-data");
 
-  struct curl_httppost* post = NULL;
-  struct curl_httppost* last = NULL;
-  curl_formadd(&post, &last, CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END);
-  curl_formadd(&post, &last, CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END);
   const auto& tensor = ort_inputs[0].Get<Tensor>();
   auto data_size = tensor.SizeInBytes();
-  curl_formadd(&post, &last, CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(),
+  curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END);
+  curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END);
+  curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(),
                CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END);
-  curl = curl_easy_init();
-  curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, 102400L);
-  curl_easy_setopt(curl, CURLOPT_URL, uri_.c_str());
-  curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L);
-  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
-  curl_easy_setopt(curl, CURLOPT_USERAGENT, "curl/7.83.1");
-  curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 50L);
-  curl_easy_setopt(curl, CURLOPT_FTP_SKIP_PASV_IP, 1L);
-  curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1L);
-  curl_easy_setopt(curl, CURLOPT_VERBOSE, verbose);
-  curl_easy_setopt(curl, CURLOPT_HTTPPOST, post);
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteResponseCallback);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*)&response_buffer);
-
-  ret = curl_easy_perform(curl);
+
+  curl_easy_setopt(curl_handler.curl_, CURLOPT_URL, uri_.c_str());
+  curl_easy_setopt(curl_handler.curl_, CURLOPT_HTTPHEADER, curl_handler.headers_);
+  curl_easy_setopt(curl_handler.curl_, CURLOPT_VERBOSE, verbose);
+  curl_easy_setopt(curl_handler.curl_, CURLOPT_HTTPPOST, curl_handler.from_);
+  curl_easy_setopt(curl_handler.curl_, CURLOPT_WRITEDATA, (void*)&response_buffer);
+
+  ret = curl_easy_perform(curl_handler.curl_);
   if (ret != CURLE_OK) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(ret));
   }
 
-  curl_easy_cleanup(curl);
-  curl = NULL;
-  curl_mime_free(mime1);
-  mime1 = NULL;
-  curl_slist_free_all(headers);
-  headers = NULL;
-
   auto output_tensor = std::make_unique<Tensor>(onnxruntime::DataTypeImpl::GetType<std::string>(), TensorShape{1}, allocator_);
   if (!output_tensor) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor");

From 842f7fd68b0b65b52e633bc69b9bd4b1fdbbe11e Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Thu, 4 May 2023 09:48:58 -0700
Subject: [PATCH 05/16] set curl dep commit id

---
 cmake/deps.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/deps.txt b/cmake/deps.txt
index 0374f9278e6fd..baba0d463a54b 100644
--- a/cmake/deps.txt
+++ b/cmake/deps.txt
@@ -48,4 +48,4 @@ b64;https://github.com/libb64/libb64/archive/refs/tags/v2.0.0.1.zip;815b6d31d50d
 pthread;https://sourceforge.net/projects/pthreads4w/files/pthreads4w-code-v3.0.0.zip;3b9e417e4474c34542b76ad40529e396ac109fb4
 triton;https://github.com/triton-inference-server/server/archive/refs/tags/v2.28.0.zip;4b305570aa1e889946e20e36050b6770e4108fee
 extensions;https://github.com/microsoft/onnxruntime-extensions/archive/81e7799c69044c745239202085eb0a98f102937b.zip;d53487035174a046628359289ad27aa0ac0380c9
-curl;
+curl;https://github.com/curl/curl/archive/refs/tags/curl-8_0_1.zip;b16d1fa8ee567b52c09a0f89940b07d8491b881d

From 1fec44ac26e83f2986e2c2a0c35a2c3f5e5befbd Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Thu, 4 May 2023 15:20:03 -0700
Subject: [PATCH 06/16] address comments

---
 onnxruntime/core/framework/cloud_invoker.cc | 105 +++++++++++++-------
 1 file changed, 69 insertions(+), 36 deletions(-)

diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
index 519f2baea5f8b..82a84b5a0bc85 100644
--- a/onnxruntime/core/framework/cloud_invoker.cc
+++ b/onnxruntime/core/framework/cloud_invoker.cc
@@ -2,6 +2,7 @@
 // Licensed under the MIT License.
 
 #ifdef USE_AZURE
+#define CURL_STATICLIB
 #include "http_client.h"
 #include "curl/curl.h"
 #include "core/common/common.h"
@@ -35,6 +36,25 @@ CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config,
   }
 }
 
+class CurlGlobal {
+ public:
+  static void Init() {
+    static CurlGlobal curl_global;
+  }
+
+ private:
+  CurlGlobal() {
+    // Thread-safety is a must since curl might also be initialized in triton client.
+    const auto* info = curl_version_info(CURLVERSION_NOW);
+    ORT_ENFORCE(info->features & CURL_VERSION_THREADSAFE, "curl global init not thread-safe, need to upgrade curl version!");
+    ORT_ENFORCE(curl_global_init(CURL_GLOBAL_DEFAULT) == CURLE_OK, "Failed to initialize curl global env!");
+  }
+  ~CurlGlobal() {
+    curl_global_cleanup();
+  }
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlGlobal);
+};
+
 // OpenAIInvoker
 class OpenAIInvoker : public CloudEndPointInvoker {
  public:
@@ -52,34 +72,31 @@ class OpenAIInvoker : public CloudEndPointInvoker {
 
 OpenAIInvoker::OpenAIInvoker(const CloudEndPointConfig& config,
                              const AllocatorPtr& allocator) : CloudEndPointInvoker(config, allocator) {
+  CurlGlobal::Init();
   ReadConfig(kAzureUri, uri_);
   ReadConfig(kAzureModelName, model_name_);
 }
 
-struct ResponseBuffer {
-  ResponseBuffer() = default;
-  ~ResponseBuffer() = default;
-  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ResponseBuffer);
-
-  using Chunk = std::unique_ptr<char[]>;
-  std::list<Chunk> chunks_;
-
-  void Fill(std::string& s) const {
-    std::for_each(chunks_.begin(), chunks_.end(), [&](const Chunk& chunk) { s.append(chunk.get()); });
-  }
+struct StringBuffer {
+  StringBuffer() = default;
+  ~StringBuffer() = default;
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(StringBuffer);
+  std::stringstream ss_;
 };
 
-static size_t WriteResponseCallback(void* contents, size_t size, size_t nmemb, void* userp) {
+// applies only when contents is a string
+static size_t WriteStringCallback(void* contents, size_t size, size_t nmemb, void* userp) {
   size_t realsize = size * nmemb;
-  auto response = reinterpret_cast<struct ResponseBuffer*>(userp);
-  response->chunks_.push_back(std::make_unique<char[]>(realsize + 1));
-  memcpy(response->chunks_.back().get(), contents, realsize);
-  response->chunks_.back()[realsize] = '\0';
+  auto buffer = reinterpret_cast<struct StringBuffer*>(userp);
+  buffer->ss_ << reinterpret_cast<const char*>(contents);
   return realsize;
 }
 
-struct CurlHandler {
-  CurlHandler() {
+using CurlWriteCallBack = size_t (*)(void*, size_t, size_t, void*);
+
+class CurlHandler {
+ public:
+  CurlHandler(CurlWriteCallBack call_back) {
     curl_ = curl_easy_init();
     curl_easy_setopt(curl_, CURLOPT_BUFFERSIZE, 102400L);
     curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 1L);
@@ -87,7 +104,7 @@ struct CurlHandler {
     curl_easy_setopt(curl_, CURLOPT_MAXREDIRS, 50L);
     curl_easy_setopt(curl_, CURLOPT_FTP_SKIP_PASV_IP, 1L);
     curl_easy_setopt(curl_, CURLOPT_TCP_KEEPALIVE, 1L);
-    curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, WriteResponseCallback);
+    curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, call_back);
   }
   ~CurlHandler() {
     if (curl_) {
@@ -107,7 +124,25 @@ struct CurlHandler {
       from_ = {};
     }
   }
+  void AddHeader(const char* data) {
+    headers_ = curl_slist_append(headers_, data);
+  }
+  template <typename... Args>
+  void AddForm(Args... args) {
+    curl_formadd(&from_, &last_, args...);
+  }
+  template <typename T>
+  void SetOption(CURLoption opt, T val) {
+    curl_easy_setopt(curl_, opt, val);
+  }
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlHandler);
+  CURLcode Perform() {
+    SetOption(CURLOPT_HTTPHEADER, headers_);
+    SetOption(CURLOPT_HTTPPOST, from_);
+    return curl_easy_perform(curl_);
+  }
+
+ private:
   CURL* curl_{};
   curl_mime* mime1_{};
   struct curl_slist* headers_{};
@@ -132,27 +167,25 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
   }
 
   CURLcode ret{};
-  CurlHandler curl_handler;
-  ResponseBuffer response_buffer;
+  CurlHandler curl_handler(WriteStringCallback);
+  StringBuffer string_buffer;
 
   std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second;
-  curl_handler.headers_ = curl_slist_append(curl_handler.headers_, full_auth.c_str());
-  curl_handler.headers_ = curl_slist_append(curl_handler.headers_, "Content-Type: multipart/form-data");
+  curl_handler.AddHeader(full_auth.c_str());
+  curl_handler.AddHeader("Content-Type: multipart/form-data");
 
   const auto& tensor = ort_inputs[0].Get<Tensor>();
   auto data_size = tensor.SizeInBytes();
-  curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END);
-  curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END);
-  curl_formadd(&curl_handler.from_, &curl_handler.last_, CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(),
-               CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END);
-
-  curl_easy_setopt(curl_handler.curl_, CURLOPT_URL, uri_.c_str());
-  curl_easy_setopt(curl_handler.curl_, CURLOPT_HTTPHEADER, curl_handler.headers_);
-  curl_easy_setopt(curl_handler.curl_, CURLOPT_VERBOSE, verbose);
-  curl_easy_setopt(curl_handler.curl_, CURLOPT_HTTPPOST, curl_handler.from_);
-  curl_easy_setopt(curl_handler.curl_, CURLOPT_WRITEDATA, (void*)&response_buffer);
-
-  ret = curl_easy_perform(curl_handler.curl_);
+  curl_handler.AddForm(CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END);
+  curl_handler.AddForm(CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END);
+  curl_handler.AddForm(CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(),
+                       CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END);
+
+  curl_handler.SetOption(CURLOPT_URL, uri_.c_str());
+  curl_handler.SetOption(CURLOPT_VERBOSE, verbose);
+  curl_handler.SetOption(CURLOPT_WRITEDATA, (void*)&string_buffer);
+
+  ret = curl_handler.Perform();
   if (ret != CURLE_OK) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(ret));
   }
@@ -163,7 +196,7 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
   }
 
   auto* output_string = output_tensor->MutableData<std::string>();
-  response_buffer.Fill(*output_string);
+  *output_string = string_buffer.ss_.str();
 
   ort_outputs.resize(1);
   auto tensor_type = DataTypeImpl::GetType<Tensor>();

From a6f18e3640d8bb3cfadf2db58d29ff4939383cdc Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Thu, 4 May 2023 17:07:40 -0700
Subject: [PATCH 07/16] write sized data to stream

---
 onnxruntime/core/framework/cloud_invoker.cc | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
index 82a84b5a0bc85..313ebcb300906 100644
--- a/onnxruntime/core/framework/cloud_invoker.cc
+++ b/onnxruntime/core/framework/cloud_invoker.cc
@@ -84,11 +84,11 @@ struct StringBuffer {
   std::stringstream ss_;
 };
 
-// applies only when contents is a string
+// apply the callback only when response is for sure to be a '/0' terminated string
 static size_t WriteStringCallback(void* contents, size_t size, size_t nmemb, void* userp) {
   size_t realsize = size * nmemb;
   auto buffer = reinterpret_cast<struct StringBuffer*>(userp);
-  buffer->ss_ << reinterpret_cast<const char*>(contents);
+  buffer->ss_.write(reinterpret_cast<const char*>(contents), realsize);
   return realsize;
 }
 
@@ -166,7 +166,6 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
     verbose = verbose_iter->second != "0" ? 1L : 0L;
   }
 
-  CURLcode ret{};
   CurlHandler curl_handler(WriteStringCallback);
   StringBuffer string_buffer;
 
@@ -185,9 +184,9 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
   curl_handler.SetOption(CURLOPT_VERBOSE, verbose);
   curl_handler.SetOption(CURLOPT_WRITEDATA, (void*)&string_buffer);
 
-  ret = curl_handler.Perform();
-  if (ret != CURLE_OK) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(ret));
+  auto curl_ret = curl_handler.Perform();
+  if (CURLE_OK != curl_ret) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(curl_ret));
   }
 
   auto output_tensor = std::make_unique<Tensor>(onnxruntime::DataTypeImpl::GetType<std::string>(), TensorShape{1}, allocator_);

From 9c6bb2e15b5f0a876cdc51c57e8e4647a90a4cab Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Thu, 4 May 2023 17:30:55 -0700
Subject: [PATCH 08/16] attach curl license

---
 ThirdPartyNotices.txt                 | 29 +++++++++++++++++++++++++++
 cgmanifests/generated/cgmanifest.json | 10 +++++++++
 2 files changed, 39 insertions(+)

diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt
index b4d981d42dfb8..ff1f826342d8a 100644
--- a/ThirdPartyNotices.txt
+++ b/ThirdPartyNotices.txt
@@ -5993,3 +5993,32 @@ https://github.com/tensorflow/tfjs
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+——
+
+curl/curl	
+
+https://github.com/curl
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (C) Daniel Stenberg, <daniel@haxx.se>, and many
+contributors, see the THANKS file.
+
+All rights reserved.
+
+Permission to use, copy, modify, and distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright
+notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
+NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall not
+be used in advertising or otherwise to promote the sale, use or other dealings
+in this Software without prior written authorization of the copyright holder.
\ No newline at end of file
diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json
index 989756361bd07..c9524d63907c1 100644
--- a/cgmanifests/generated/cgmanifest.json
+++ b/cgmanifests/generated/cgmanifest.json
@@ -447,6 +447,16 @@
         },
         "comments": "triton"
       }
+    },
+    {
+      "component": {
+        "type": "git",
+        "git": {
+          "commitHash": "b16d1fa8ee567b52c09a0f89940b07d8491b881d",
+          "repositoryUrl": "https://github.com/curl/curl.git"
+        },
+        "comments": "curl"
+      }
     }
   ]
 }

From c0e72cc6acbd7987f25f464a450c3955da040963 Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Tue, 9 May 2023 09:12:03 -0700
Subject: [PATCH 09/16] merge main

---
 .../c-api-noopenmp-packaging-pipelines.yml    | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
index 84de2cc356b5d..d2f1c12bec881 100644
--- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
+++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
@@ -24,17 +24,6 @@ parameters:
   type: boolean
   default: false
 
-<<<<<<< HEAD
-- name: NugetPackageSuffix
-  displayName: Suffix to append to nuget package
-  type: string
-  default: ''
-
-- name: AdditionalBuildFlag
-  displayName: Build flags to append to build command
-  type: string
-  default: ''
-=======
 - name: PreReleaseVersionSuffixString
   displayName: Suffix added to pre-release package version. Only used if IsReleaseBuild is true. Denotes the type of pre-release package.
   type: string
@@ -60,7 +49,16 @@ parameters:
   displayName: Pipeline BuildId, you could find it in the URL
   type: string
   default: '0'
->>>>>>> main
+
+- name: NugetPackageSuffix
+  displayName: Suffix to append to nuget package
+  type: string
+  default: ''
+
+- name: AdditionalBuildFlag
+  displayName: Build flags to append to build command
+  type: string
+  default: ''
 
 resources:
   repositories:

From e558c32589b8e007ac7598c6b8ec0ec71b4724d5 Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Tue, 9 May 2023 11:50:38 -0700
Subject: [PATCH 10/16] remove zlib from cmake

---
 cmake/external/triton.cmake       | 12 +++++-------
 cmake/onnxruntime_framework.cmake |  4 ++--
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/cmake/external/triton.cmake b/cmake/external/triton.cmake
index 1bc2d620d8784..c25cb594e8f8d 100644
--- a/cmake/external/triton.cmake
+++ b/cmake/external/triton.cmake
@@ -44,13 +44,11 @@ if (WIN32)
   vcpkg_install(re2)
   vcpkg_install(boost-interprocess)
   vcpkg_install(boost-stacktrace)
-  vcpkg_install(zlib)
   vcpkg_install(pthread)
   vcpkg_install(b64)
 
   add_dependencies(getb64 getpthread)
-  add_dependencies(getpthread getzlib)
-  add_dependencies(getzlib getboost-stacktrace)
+  add_dependencies(getpthread getboost-stacktrace)
   add_dependencies(getboost-stacktrace getboost-interprocess)
   add_dependencies(getboost-interprocess getre2)
   add_dependencies(getre2 getrapidjson)
@@ -59,11 +57,11 @@ if (WIN32)
 
   ExternalProject_Add(triton
                       GIT_REPOSITORY https://github.com/triton-inference-server/client.git
-                      #GIT_TAG r22.12
+                      GIT_TAG main
                       PREFIX triton
                       SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src
                       BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build
-                      CMAKE_ARGS -DVCPKG_TARGET_TRIPLET=${onnxruntime_target_platform}-windows -DCMAKE_TOOLCHAIN_FILE=${VCPKG_SRC}/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON
+                      CMAKE_ARGS -DVCPKG_TARGET_TRIPLET=${onnxruntime_target_platform}-windows -DCMAKE_TOOLCHAIN_FILE=${VCPKG_SRC}/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_ZLIB=OFF
                       INSTALL_COMMAND ""
                       UPDATE_COMMAND "")
 
@@ -85,11 +83,11 @@ else()
 
   ExternalProject_Add(triton
                       GIT_REPOSITORY https://github.com/triton-inference-server/client.git
-                      #GIT_TAG r22.12
+                      GIT_TAG main
                       PREFIX triton
                       SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src
                       BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build
-                      CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON
+                      CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_ZLIB=OFF
                       INSTALL_COMMAND ""
                       UPDATE_COMMAND "")
 
diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake
index b5cd2c3d1b201..fcbae6ad32c50 100644
--- a/cmake/onnxruntime_framework.cmake
+++ b/cmake/onnxruntime_framework.cmake
@@ -46,13 +46,13 @@ if (onnxruntime_USE_AZURE)
   if (WIN32)
 
     link_directories(${VCPKG_SRC}/installed/${onnxruntime_target_platform}-windows/lib)
-    target_link_libraries(onnxruntime_framework PRIVATE libcurl httpclient_static ws2_32 crypt32 Wldap32 zlib)
+    target_link_libraries(onnxruntime_framework PRIVATE libcurl httpclient_static ws2_32 crypt32 Wldap32)
 
   else()
 
     find_package(ZLIB REQUIRED)
     find_package(OpenSSL REQUIRED)
-    target_link_libraries(onnxruntime_framework PRIVATE httpclient_static curl ZLIB::ZLIB OpenSSL::Crypto OpenSSL::SSL)
+    target_link_libraries(onnxruntime_framework PRIVATE httpclient_static curl OpenSSL::Crypto OpenSSL::SSL)
 
   endif() #if (WIN32)
 

From 6b88835a8aa2748fd38df4ab2e6072801379c059 Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Wed, 10 May 2023 10:35:03 -0700
Subject: [PATCH 11/16] update 3rd party notice

---
 ThirdPartyNotices.txt | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt
index b4d981d42dfb8..ff1f826342d8a 100644
--- a/ThirdPartyNotices.txt
+++ b/ThirdPartyNotices.txt
@@ -5993,3 +5993,32 @@ https://github.com/tensorflow/tfjs
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+——
+
+curl/curl	
+
+https://github.com/curl
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (C) Daniel Stenberg, <daniel@haxx.se>, and many
+contributors, see the THANKS file.
+
+All rights reserved.
+
+Permission to use, copy, modify, and distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright
+notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
+NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall not
+be used in advertising or otherwise to promote the sale, use or other dealings
+in this Software without prior written authorization of the copyright holder.
\ No newline at end of file

From 94279c1868cc4314d2305088ed36f6811e1eb0c2 Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Wed, 10 May 2023 12:56:30 -0700
Subject: [PATCH 12/16] stick to triton client r23.05

---
 cmake/external/triton.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/external/triton.cmake b/cmake/external/triton.cmake
index c25cb594e8f8d..9f9da0d8a6baf 100644
--- a/cmake/external/triton.cmake
+++ b/cmake/external/triton.cmake
@@ -57,7 +57,7 @@ if (WIN32)
 
   ExternalProject_Add(triton
                       GIT_REPOSITORY https://github.com/triton-inference-server/client.git
-                      GIT_TAG main
+                      GIT_TAG r23.05
                       PREFIX triton
                       SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src
                       BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build
@@ -83,7 +83,7 @@ else()
 
   ExternalProject_Add(triton
                       GIT_REPOSITORY https://github.com/triton-inference-server/client.git
-                      GIT_TAG main
+                      GIT_TAG r23.05
                       PREFIX triton
                       SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src
                       BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build

From ae7d534aef87412079735ffe6eac15340af2a492 Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Wed, 10 May 2023 22:50:52 -0700
Subject: [PATCH 13/16] fix comments

---
 onnxruntime/core/framework/cloud_invoker.cc | 75 +++++++++------------
 1 file changed, 32 insertions(+), 43 deletions(-)

diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
index 313ebcb300906..c14423f6963b0 100644
--- a/onnxruntime/core/framework/cloud_invoker.cc
+++ b/onnxruntime/core/framework/cloud_invoker.cc
@@ -86,46 +86,36 @@ struct StringBuffer {
 
 // apply the callback only when response is for sure to be a '/0' terminated string
 static size_t WriteStringCallback(void* contents, size_t size, size_t nmemb, void* userp) {
-  size_t realsize = size * nmemb;
-  auto buffer = reinterpret_cast<struct StringBuffer*>(userp);
-  buffer->ss_.write(reinterpret_cast<const char*>(contents), realsize);
-  return realsize;
+  try {
+    size_t realsize = size * nmemb;
+    auto buffer = reinterpret_cast<struct StringBuffer*>(userp);
+    buffer->ss_.write(reinterpret_cast<const char*>(contents), realsize);
+    return realsize;
+  } catch (...) {
+    // exception caught, abort write
+    return CURLcode::CURLE_WRITE_ERROR;
+  }
 }
 
 using CurlWriteCallBack = size_t (*)(void*, size_t, size_t, void*);
 
 class CurlHandler {
  public:
-  CurlHandler(CurlWriteCallBack call_back) {
-    curl_ = curl_easy_init();
-    curl_easy_setopt(curl_, CURLOPT_BUFFERSIZE, 102400L);
-    curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 1L);
-    curl_easy_setopt(curl_, CURLOPT_USERAGENT, "curl/7.83.1");
-    curl_easy_setopt(curl_, CURLOPT_MAXREDIRS, 50L);
-    curl_easy_setopt(curl_, CURLOPT_FTP_SKIP_PASV_IP, 1L);
-    curl_easy_setopt(curl_, CURLOPT_TCP_KEEPALIVE, 1L);
-    curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, call_back);
-  }
-  ~CurlHandler() {
-    if (curl_) {
-      curl_easy_cleanup(curl_);
-      curl_ = {};
-    }
-    if (mime1_) {
-      curl_mime_free(mime1_);
-      mime1_ = {};
-    }
-    if (headers_) {
-      curl_slist_free_all(headers_);
-      headers_ = {};
-    }
-    if (from_) {
-      curl_formfree(from_);
-      from_ = {};
-    }
+  CurlHandler(CurlWriteCallBack call_back) : curl_(curl_easy_init(), curl_easy_cleanup),
+                                             headers_(nullptr, curl_slist_free_all),
+                                             from_holder_(from_, curl_formfree) {
+    curl_easy_setopt(curl_.get(), CURLOPT_BUFFERSIZE, 102400L);
+    curl_easy_setopt(curl_.get(), CURLOPT_NOPROGRESS, 1L);
+    curl_easy_setopt(curl_.get(), CURLOPT_USERAGENT, "curl/7.83.1");
+    curl_easy_setopt(curl_.get(), CURLOPT_MAXREDIRS, 50L);
+    curl_easy_setopt(curl_.get(), CURLOPT_FTP_SKIP_PASV_IP, 1L);
+    curl_easy_setopt(curl_.get(), CURLOPT_TCP_KEEPALIVE, 1L);
+    curl_easy_setopt(curl_.get(), CURLOPT_WRITEFUNCTION, call_back);
   }
+  ~CurlHandler() = default;
+
   void AddHeader(const char* data) {
-    headers_ = curl_slist_append(headers_, data);
+    headers_.reset(curl_slist_append(headers_.get(), data));
   }
   template <typename... Args>
   void AddForm(Args... args) {
@@ -133,21 +123,22 @@ class CurlHandler {
   }
   template <typename T>
   void SetOption(CURLoption opt, T val) {
-    curl_easy_setopt(curl_, opt, val);
+    curl_easy_setopt(curl_.get(), opt, val);
   }
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlHandler);
   CURLcode Perform() {
-    SetOption(CURLOPT_HTTPHEADER, headers_);
+    SetOption(CURLOPT_HTTPHEADER, headers_.get());
     SetOption(CURLOPT_HTTPPOST, from_);
-    return curl_easy_perform(curl_);
+    return curl_easy_perform(curl_.get());
   }
 
  private:
-  CURL* curl_{};
-  curl_mime* mime1_{};
-  struct curl_slist* headers_{};
-  struct curl_httppost* from_{};
-  struct curl_httppost* last_{};
+
+  std::unique_ptr<CURL, decltype(curl_easy_cleanup)*> curl_;
+  std::unique_ptr<curl_slist, decltype(curl_slist_free_all)*> headers_;
+  curl_httppost* from_{};
+  curl_httppost* last_{};
+  std::unique_ptr<curl_httppost, decltype(curl_formfree)*> from_holder_;
 };
 
 onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
@@ -196,10 +187,8 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
 
   auto* output_string = output_tensor->MutableData<std::string>();
   *output_string = string_buffer.ss_.str();
-
-  ort_outputs.resize(1);
   auto tensor_type = DataTypeImpl::GetType<Tensor>();
-  ort_outputs[0].Init(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc());
+  ort_outputs.emplace_back(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc());
   return Status::OK();
 }
 

From 7e51a9cb1073eaa628f50090d675b5a5c3d0335c Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Wed, 10 May 2023 23:44:02 -0700
Subject: [PATCH 14/16] fix header issue

---
 onnxruntime/core/framework/cloud_invoker.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
index c14423f6963b0..c6cde67d21006 100644
--- a/onnxruntime/core/framework/cloud_invoker.cc
+++ b/onnxruntime/core/framework/cloud_invoker.cc
@@ -115,7 +115,7 @@ class CurlHandler {
   ~CurlHandler() = default;
 
   void AddHeader(const char* data) {
-    headers_.reset(curl_slist_append(headers_.get(), data));
+    headers_.reset(curl_slist_append(headers_.release(), data));
   }
   template <typename... Args>
   void AddForm(Args... args) {

From fdad7c78a597d9d075bdd311914400fe6004cb07 Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Thu, 11 May 2023 08:33:35 -0700
Subject: [PATCH 15/16] format code

---
 onnxruntime/core/framework/cloud_invoker.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
index c6cde67d21006..0f07ac29464e5 100644
--- a/onnxruntime/core/framework/cloud_invoker.cc
+++ b/onnxruntime/core/framework/cloud_invoker.cc
@@ -133,7 +133,6 @@ class CurlHandler {
   }
 
  private:
-
   std::unique_ptr<CURL, decltype(curl_easy_cleanup)*> curl_;
   std::unique_ptr<curl_slist, decltype(curl_slist_free_all)*> headers_;
   curl_httppost* from_{};

From 866db7e1bb22abcc8be7ea23abaeaf60a251dbc5 Mon Sep 17 00:00:00 2001
From: Randy Shuai <rashuai@microsoft.com>
Date: Thu, 11 May 2023 11:49:37 -0700
Subject: [PATCH 16/16] clear fetch before emplace

---
 onnxruntime/core/framework/cloud_invoker.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
index 0f07ac29464e5..a2e9ec97cf931 100644
--- a/onnxruntime/core/framework/cloud_invoker.cc
+++ b/onnxruntime/core/framework/cloud_invoker.cc
@@ -187,6 +187,7 @@ onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
   auto* output_string = output_tensor->MutableData<std::string>();
   *output_string = string_buffer.ss_.str();
   auto tensor_type = DataTypeImpl::GetType<Tensor>();
+  ort_outputs.clear();
   ort_outputs.emplace_back(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc());
   return Status::OK();
 }