microsoft · RandySheriffH · May 12, 2023 · May 3, 2023 · May 4, 2023 · May 4, 2023
diff --git a/cmake/deps.txt b/cmake/deps.txt
@@ -47,5 +47,5 @@ boost;https://github.com/boostorg/boost/archive/refs/tags/boost-1.81.0.zip;f6ab0
 b64;https://github.com/libb64/libb64/archive/refs/tags/v2.0.0.1.zip;815b6d31d50d9e63df55b25ce555e7b787153c28
 pthread;https://sourceforge.net/projects/pthreads4w/files/pthreads4w-code-v3.0.0.zip;3b9e417e4474c34542b76ad40529e396ac109fb4
 triton;https://github.com/triton-inference-server/server/archive/refs/tags/v2.28.0.zip;4b305570aa1e889946e20e36050b6770e4108fee
-# above are deps introduced by triton client, might remove after 1.14 release
 extensions;https://github.com/microsoft/onnxruntime-extensions/archive/81e7799c69044c745239202085eb0a98f102937b.zip;d53487035174a046628359289ad27aa0ac0380c9
+curl;https://github.com/curl/curl/archive/refs/tags/curl-8_0_1.zip;b16d1fa8ee567b52c09a0f89940b07d8491b881d
diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake
@@ -40,7 +40,7 @@ onnxruntime_add_static_library(onnxruntime_framework ${onnxruntime_framework_src
 if (onnxruntime_USE_AZURE)
 
   add_dependencies(onnxruntime_framework triton)
-  target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include)
+  target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include ${TRITON_THIRD_PARTY}/curl/include)
   link_directories(${TRITON_BIN}/lib ${TRITON_BIN}/lib64 ${TRITON_THIRD_PARTY}/curl/lib ${TRITON_THIRD_PARTY}/curl/lib64)
 
   if (WIN32)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
@@ -41,6 +41,7 @@
   <!-- only set the .net6 targets if we're building an ORT package.
        we can add .net6 support to other packages later as needed -->
   <PropertyGroup Condition="('$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime' OR
+                             '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Azure' OR
                              '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Gpu')">
     <Net6Targets>net6.0;net6.0-android;net6.0-ios;net6.0-macos</Net6Targets>
   </PropertyGroup>

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs
@@ -375,10 +375,10 @@ public IntPtr Appender(IntPtr handle, IntPtr[] optKeys, IntPtr[] optValues, UInt
         /// <param name="providerOptions">Optional key/value pairs to specify execution provider options.</param>
         public void AppendExecutionProvider(string providerName, Dictionary<string, string> providerOptions = null)
         {
-            if (providerName != "SNPE" && providerName != "XNNPACK" && providerName != "QNN")
+            if (providerName != "SNPE" && providerName != "XNNPACK" && providerName != "QNN" && providerName != "AZURE")
             {
                 throw new NotSupportedException(
-                    "Only QNN, SNPE and XNNPACK execution providers can be enabled by this method.");
+                    "Only QNN, SNPE, XNNPACK and AZURE execution providers can be enabled by this method.");
             }
 
             if (providerOptions == null)

diff --git a/java/src/main/java/ai/onnxruntime/OrtProvider.java b/java/src/main/java/ai/onnxruntime/OrtProvider.java
@@ -23,7 +23,8 @@ public enum OrtProvider {
   ARM_NN("ArmNNExecutionProvider"),
   ROCM("ROCMExecutionProvider"),
   CORE_ML("CoreMLExecutionProvider"),
-  XNNPACK("XnnpackExecutionProvider");
+  XNNPACK("XnnpackExecutionProvider"),
+  AZURE("AzureExecutionProvider");
 
   private static final Map<String, OrtProvider> valueMap = new HashMap<>(values().length);
 

diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
@@ -2,7 +2,9 @@
 // Licensed under the MIT License.
 
 #ifdef USE_AZURE
+#define CURL_STATICLIB
 #include "http_client.h"
+#include "curl/curl.h"
 #include "core/common/common.h"
 #include "core/framework/cloud_invoker.h"
 #include "core/framework/ort_value.h"
@@ -18,13 +20,14 @@ namespace onnxruntime {
 
 namespace tc = triton::client;
 
-const char* kAzureUri = "azure.uri";
-const char* kAzureModelName = "azure.model_name";
-const char* kAzureModelVer = "azure.model_version";
-const char* kAzureVerbose = "azure.verbose";
-const char* kAzureEndpointType = "azure.endpoint_type";
-const char* kAzureAuthKey = "azure.auth_key";
-const char* kAzureTriton = "triton";
+constexpr const char* kAzureUri = "azure.uri";
+constexpr const char* kAzureModelName = "azure.model_name";
+constexpr const char* kAzureModelVer = "azure.model_version";
+constexpr const char* kAzureVerbose = "azure.verbose";
+constexpr const char* kAzureEndpointType = "azure.endpoint_type";
+constexpr const char* kAzureAuthKey = "azure.auth_key";
+constexpr const char* kAzureTriton = "triton";
+constexpr const char* kAzureOpenAI = "openai";
 
 CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config,
                                            const AllocatorPtr& allocator) : config_(config), allocator_(allocator) {
@@ -33,6 +36,175 @@ CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config,
   }
 }
 
+class CurlGlobal {
+ public:
+  static void Init() {
+    static CurlGlobal curl_global;
+  }
+
+ private:
+  CurlGlobal() {
+    // Thread-safety is a must since curl might also be initialized in triton client.
+    const auto* info = curl_version_info(CURLVERSION_NOW);
+    ORT_ENFORCE(info->features & CURL_VERSION_THREADSAFE, "curl global init not thread-safe, need to upgrade curl version!");
+    ORT_ENFORCE(curl_global_init(CURL_GLOBAL_DEFAULT) == CURLE_OK, "Failed to initialize curl global env!");
+  }
+  ~CurlGlobal() {
+    curl_global_cleanup();
+  }
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlGlobal);
+};
+
+// OpenAIInvoker
+class OpenAIInvoker : public CloudEndPointInvoker {
+ public:
+  OpenAIInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator);
+  onnxruntime::Status Send(const CloudEndPointConfig& run_options,
+                           const InlinedVector<std::string>& input_names,
+                           gsl::span<const OrtValue> ort_inputs,
+                           const InlinedVector<std::string>& output_names,
+                           std::vector<OrtValue>& ort_outputs) const override;
+
+ private:
+  std::string uri_;
+  std::string model_name_;
+};
+
+OpenAIInvoker::OpenAIInvoker(const CloudEndPointConfig& config,
+                             const AllocatorPtr& allocator) : CloudEndPointInvoker(config, allocator) {
+  CurlGlobal::Init();
+  ReadConfig(kAzureUri, uri_);
+  ReadConfig(kAzureModelName, model_name_);
+}
+
+struct StringBuffer {
+  StringBuffer() = default;
+  ~StringBuffer() = default;
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(StringBuffer);
+  std::stringstream ss_;
+};
+
+// applies only when contents is a string
+static size_t WriteStringCallback(void* contents, size_t size, size_t nmemb, void* userp) {
+  size_t realsize = size * nmemb;
+  auto buffer = reinterpret_cast<struct StringBuffer*>(userp);
+  buffer->ss_ << reinterpret_cast<const char*>(contents);
+  return realsize;
+}
+
+using CurlWriteCallBack = size_t (*)(void*, size_t, size_t, void*);
+
+class CurlHandler {
+ public:
+  CurlHandler(CurlWriteCallBack call_back) {
+    curl_ = curl_easy_init();
+    curl_easy_setopt(curl_, CURLOPT_BUFFERSIZE, 102400L);
+    curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 1L);
+    curl_easy_setopt(curl_, CURLOPT_USERAGENT, "curl/7.83.1");
+    curl_easy_setopt(curl_, CURLOPT_MAXREDIRS, 50L);
+    curl_easy_setopt(curl_, CURLOPT_FTP_SKIP_PASV_IP, 1L);
+    curl_easy_setopt(curl_, CURLOPT_TCP_KEEPALIVE, 1L);
+    curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, call_back);
+  }
+  ~CurlHandler() {
+    if (curl_) {
+      curl_easy_cleanup(curl_);
+      curl_ = {};
+    }
+    if (mime1_) {
+      curl_mime_free(mime1_);
+      mime1_ = {};
+    }
+    if (headers_) {
+      curl_slist_free_all(headers_);
+      headers_ = {};
+    }
+    if (from_) {
+      curl_formfree(from_);
+      from_ = {};
+    }
+  }
+  void AddHeader(const char* data) {
+    headers_ = curl_slist_append(headers_, data);
+  }
+  template <typename... Args>
+  void AddForm(Args... args) {
+    curl_formadd(&from_, &last_, args...);
+  }
+  template <typename T>
+  void SetOption(CURLoption opt, T val) {
+    curl_easy_setopt(curl_, opt, val);
+  }
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlHandler);
+  CURLcode Perform() {
+    SetOption(CURLOPT_HTTPHEADER, headers_);
+    SetOption(CURLOPT_HTTPPOST, from_);
+    return curl_easy_perform(curl_);
+  }
+
+ private:
+  CURL* curl_{};
+  curl_mime* mime1_{};
+  struct curl_slist* headers_{};
+  struct curl_httppost* from_{};
+  struct curl_httppost* last_{};
+};
+
+onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
+                                        const InlinedVector<std::string>& /*input_names*/,
+                                        gsl::span<const OrtValue> ort_inputs,
+                                        const InlinedVector<std::string>& /*output_names*/,
+                                        std::vector<OrtValue>& ort_outputs) const {
+  const auto auth_key_iter = run_options.find(kAzureAuthKey);
+  if (run_options.end() == auth_key_iter || auth_key_iter->second.empty()) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "auth key must be specified for openai client");
+  }
+  long verbose = 0;
+  const auto verbose_iter = run_options.find(kAzureVerbose);
+  if (run_options.end() != verbose_iter) {
+    verbose = verbose_iter->second != "0" ? 1L : 0L;
+  }
+
+  CURLcode ret{};
+  CurlHandler curl_handler(WriteStringCallback);
+  StringBuffer string_buffer;
+
+  std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second;
+  curl_handler.AddHeader(full_auth.c_str());
+  curl_handler.AddHeader("Content-Type: multipart/form-data");
+
+  const auto& tensor = ort_inputs[0].Get<Tensor>();
+  auto data_size = tensor.SizeInBytes();
+  curl_handler.AddForm(CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END);
+  curl_handler.AddForm(CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END);
+  curl_handler.AddForm(CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(),
+                       CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END);
+
+  curl_handler.SetOption(CURLOPT_URL, uri_.c_str());
+  curl_handler.SetOption(CURLOPT_VERBOSE, verbose);
+  curl_handler.SetOption(CURLOPT_WRITEDATA, (void*)&string_buffer);
+
+  ret = curl_handler.Perform();
+  if (ret != CURLE_OK) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(ret));
+  }
+
+  auto output_tensor = std::make_unique<Tensor>(onnxruntime::DataTypeImpl::GetType<std::string>(), TensorShape{1}, allocator_);
+  if (!output_tensor) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor");
+  }
+
+  auto* output_string = output_tensor->MutableData<std::string>();
+  *output_string = string_buffer.ss_.str();
+
+  ort_outputs.resize(1);
+  auto tensor_type = DataTypeImpl::GetType<Tensor>();
+  ort_outputs[0].Init(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc());
+  return Status::OK();
+}
+
+// AzureTritonInvoker
 class AzureTritonInvoker : public CloudEndPointInvoker {
  public:
   AzureTritonInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator);
@@ -287,6 +459,9 @@ Status CloudEndPointInvoker::CreateInvoker(const CloudEndPointConfig& config,
       if (iter->second == kAzureTriton) {
         invoker = std::make_unique<AzureTritonInvoker>(config, allocator);
         return status;
+      } else if (iter->second == kAzureOpenAI) {
+        invoker = std::make_unique<OpenAIInvoker>(config, allocator);
+        return status;
       }  // else other endpoint types ...
     }
     status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,

diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
@@ -24,6 +24,16 @@ parameters:
   type: boolean
   default: false
 
+- name: NugetPackageSuffix
+  displayName: Suffix to append to nuget package
+  type: string
+  default: ''
+
+- name: AdditionalBuildFlag
+  displayName: Build flags to append to build command
+  type: string
+  default: ''
+
 resources:
   repositories:
   - repository: onnxruntime-inference-examples # The name used to reference this repository in the checkout step
@@ -43,9 +53,9 @@ stages:
     DoCompliance: ${{ parameters.DoCompliance }}
     DoEsrp: ${{ parameters.DoEsrp }}
     IsReleaseBuild: ${{ parameters.IsReleaseBuild }}
-    OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime'
+    OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime${{ parameters.NugetPackageSuffix }}'
     AdditionalBuildFlags: ''
-    AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos'
+    AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos ${{parameters.AdditionalBuildFlag}}'
     BuildVariant: 'default'
 
 - template: templates/ondevice-training-cpu-packaging-pipeline.yml

diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
@@ -190,4 +190,25 @@ stages:
         ORT_EP_NAME: CPU
         GenerateDocumentation: false
         WITH_CACHE: true
-        MachinePool: 'onnxruntime-Win2019-CPU-training-AMD'
+        MachinePool: 'onnxruntime-Win2019-CPU-training-AMD'
+
+- stage: x64_release_azure
+  dependsOn: []
+  jobs:
+    - template: templates/win-ci-vs-2019.yml
+      parameters:
+        BuildConfig: 'RelWithDebInfo'
+        EnvSetupScript: setup_env_azure.bat
+        buildArch: x64
+        additionalBuildFlags: --use_azure
+        msbuildPlatform: x64
+        isX86: false
+        job_name_suffix: x64_release_azure
+        RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
+        RunStaticCodeAnalysis: false
+        EnablePython: false
+        isTraining: false
+        ORT_EP_NAME: CPU
+        GenerateDocumentation: false
+        WITH_CACHE: true
+        MachinePool: 'onnxruntime-Win-CPU-2019'