From 898e440fef1e50a58b88751eeff4e87d75fd2a23 Mon Sep 17 00:00:00 2001
From: RandySheriffH <48490400+RandySheriffH@users.noreply.github.com>
Date: Thu, 11 May 2023 22:04:02 -0700
Subject: [PATCH] Implement openAI endpoint invoker for nuget (#15797)

Implement openAI audio endpoint, and enable nuget packaging.

---------

Co-authored-by: Randy Shuai <rashuai@microsoft.com>
---
 ThirdPartyNotices.txt                         |  29 +++
 cgmanifests/generated/cgmanifest.json         |  10 +
 cmake/deps.txt                                |   3 +-
 cmake/external/emsdk                          |   2 +-
 cmake/external/triton.cmake                   |  12 +-
 cmake/onnxruntime_framework.cmake             |   6 +-
 .../Microsoft.ML.OnnxRuntime.csproj           |   1 +
 .../SessionOptions.shared.cs                  |   4 +-
 .../main/java/ai/onnxruntime/OrtProvider.java |   3 +-
 onnxruntime/core/framework/cloud_invoker.cc   | 177 +++++++++++++++++-
 .../c-api-noopenmp-packaging-pipelines.yml    |  14 +-
 .../azure-pipelines/win-ci-pipeline.yml       |  23 ++-
 12 files changed, 259 insertions(+), 25 deletions(-)

diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt
index b4d981d42dfb8..e099840e64ce7 100644
--- a/ThirdPartyNotices.txt
+++ b/ThirdPartyNotices.txt
@@ -5993,3 +5993,32 @@ https://github.com/tensorflow/tfjs
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+——
+
+curl/curl
+
+https://github.com/curl
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (C) Daniel Stenberg, <daniel@haxx.se>, and many
+contributors, see the THANKS file.
+
+All rights reserved.
+
+Permission to use, copy, modify, and distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright
+notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
+NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall not
+be used in advertising or otherwise to promote the sale, use or other dealings
+in this Software without prior written authorization of the copyright holder.
\ No newline at end of file
diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json
index bed8acc92cbd7..2054fd4b36262 100644
--- a/cgmanifests/generated/cgmanifest.json
+++ b/cgmanifests/generated/cgmanifest.json
@@ -448,5 +448,15 @@
         "comments": "extensions"
       }
     }
+    {
+      "component": {
+        "type": "git",
+        "git": {
+          "commitHash": "b16d1fa8ee567b52c09a0f89940b07d8491b881d",
+          "repositoryUrl": "https://github.com/curl/curl.git"
+        },
+        "comments": "curl"
+      }
+    }
   ]
 }
diff --git a/cmake/deps.txt b/cmake/deps.txt
index 58a7f416319c2..f940f1f65c6ce 100644
--- a/cmake/deps.txt
+++ b/cmake/deps.txt
@@ -49,4 +49,5 @@ pthread;https://sourceforge.net/projects/pthreads4w/files/pthreads4w-code-v3.0.0
 triton;https://github.com/triton-inference-server/server/archive/refs/tags/v2.28.0.zip;4b305570aa1e889946e20e36050b6770e4108fee
 # above are deps introduced by triton client, might remove after 1.14 release
 extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c
-eigen;https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip;ee201b07085203ea7bd8eb97cbcb31b07cfa3efb
\ No newline at end of file
+eigen;https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip;ee201b07085203ea7bd8eb97cbcb31b07cfa3efb
+curl;https://github.com/curl/curl/archive/refs/tags/curl-8_0_1.zip;b16d1fa8ee567b52c09a0f89940b07d8491b881d
diff --git a/cmake/external/emsdk b/cmake/external/emsdk
index b113f24842c6e..0ab19024f08c6 160000
--- a/cmake/external/emsdk
+++ b/cmake/external/emsdk
@@ -1 +1 @@
-Subproject commit b113f24842c6e97fe3e352084db09a6e278593ae
+Subproject commit 0ab19024f08c6673a713e454ef8bd95e174c807f
diff --git a/cmake/external/triton.cmake b/cmake/external/triton.cmake
index b24768bd89afd..9f9da0d8a6baf 100644
--- a/cmake/external/triton.cmake
+++ b/cmake/external/triton.cmake
@@ -44,13 +44,11 @@ if (WIN32)
   vcpkg_install(re2)
   vcpkg_install(boost-interprocess)
   vcpkg_install(boost-stacktrace)
-  vcpkg_install(zlib)
   vcpkg_install(pthread)
   vcpkg_install(b64)
 
   add_dependencies(getb64 getpthread)
-  add_dependencies(getpthread getzlib)
-  add_dependencies(getzlib getboost-stacktrace)
+  add_dependencies(getpthread getboost-stacktrace)
   add_dependencies(getboost-stacktrace getboost-interprocess)
   add_dependencies(getboost-interprocess getre2)
   add_dependencies(getre2 getrapidjson)
@@ -59,11 +57,11 @@ if (WIN32)
 
   ExternalProject_Add(triton
                       GIT_REPOSITORY https://github.com/triton-inference-server/client.git
-                      GIT_TAG r22.12
+                      GIT_TAG r23.05
                       PREFIX triton
                       SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src
                       BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build
-                      CMAKE_ARGS -DVCPKG_TARGET_TRIPLET=${onnxruntime_target_platform}-windows -DCMAKE_TOOLCHAIN_FILE=${VCPKG_SRC}/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON
+                      CMAKE_ARGS -DVCPKG_TARGET_TRIPLET=${onnxruntime_target_platform}-windows -DCMAKE_TOOLCHAIN_FILE=${VCPKG_SRC}/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_ZLIB=OFF
                       INSTALL_COMMAND ""
                       UPDATE_COMMAND "")
 
@@ -85,11 +83,11 @@ else()
 
   ExternalProject_Add(triton
                       GIT_REPOSITORY https://github.com/triton-inference-server/client.git
-                      GIT_TAG r22.12
+                      GIT_TAG r23.05
                       PREFIX triton
                       SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src
                       BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build
-                      CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON
+                      CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_ZLIB=OFF
                       INSTALL_COMMAND ""
                       UPDATE_COMMAND "")
 
diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake
index 5c947a52b7838..fcbae6ad32c50 100644
--- a/cmake/onnxruntime_framework.cmake
+++ b/cmake/onnxruntime_framework.cmake
@@ -40,19 +40,19 @@ onnxruntime_add_static_library(onnxruntime_framework ${onnxruntime_framework_src
 if (onnxruntime_USE_AZURE)
 
   add_dependencies(onnxruntime_framework triton)
-  target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include)
+  target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include ${TRITON_THIRD_PARTY}/curl/include)
   link_directories(${TRITON_BIN}/lib ${TRITON_BIN}/lib64 ${TRITON_THIRD_PARTY}/curl/lib ${TRITON_THIRD_PARTY}/curl/lib64)
 
   if (WIN32)
 
     link_directories(${VCPKG_SRC}/installed/${onnxruntime_target_platform}-windows/lib)
-    target_link_libraries(onnxruntime_framework PRIVATE libcurl httpclient_static ws2_32 crypt32 Wldap32 zlib)
+    target_link_libraries(onnxruntime_framework PRIVATE libcurl httpclient_static ws2_32 crypt32 Wldap32)
 
   else()
 
     find_package(ZLIB REQUIRED)
     find_package(OpenSSL REQUIRED)
-    target_link_libraries(onnxruntime_framework PRIVATE httpclient_static curl ZLIB::ZLIB OpenSSL::Crypto OpenSSL::SSL)
+    target_link_libraries(onnxruntime_framework PRIVATE httpclient_static curl OpenSSL::Crypto OpenSSL::SSL)
 
   endif() #if (WIN32)
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
index 39d90c55b7ba8..9a8cc269bff2d 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
@@ -41,6 +41,7 @@
   <!-- only set the .net6 targets if we're building an ORT package.
        we can add .net6 support to other packages later as needed -->
   <PropertyGroup Condition="('$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime' OR
+                             '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Azure' OR
                              '$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Gpu')">
     <Net6Targets>net6.0;net6.0-android;net6.0-ios;net6.0-macos</Net6Targets>
   </PropertyGroup>
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs
index 6bbb159a3de37..30951bae3f9f9 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs
@@ -375,10 +375,10 @@ public IntPtr Appender(IntPtr handle, IntPtr[] optKeys, IntPtr[] optValues, UInt
         /// <param name="providerOptions">Optional key/value pairs to specify execution provider options.</param>
         public void AppendExecutionProvider(string providerName, Dictionary<string, string> providerOptions = null)
         {
-            if (providerName != "SNPE" && providerName != "XNNPACK" && providerName != "QNN")
+            if (providerName != "SNPE" && providerName != "XNNPACK" && providerName != "QNN" && providerName != "AZURE")
             {
                 throw new NotSupportedException(
-                    "Only QNN, SNPE and XNNPACK execution providers can be enabled by this method.");
+                    "Only QNN, SNPE, XNNPACK and AZURE execution providers can be enabled by this method.");
             }
 
             if (providerOptions == null)
diff --git a/java/src/main/java/ai/onnxruntime/OrtProvider.java b/java/src/main/java/ai/onnxruntime/OrtProvider.java
index cb35bf4f507f5..0da9487c675a2 100644
--- a/java/src/main/java/ai/onnxruntime/OrtProvider.java
+++ b/java/src/main/java/ai/onnxruntime/OrtProvider.java
@@ -23,7 +23,8 @@ public enum OrtProvider {
   ARM_NN("ArmNNExecutionProvider"),
   ROCM("ROCMExecutionProvider"),
   CORE_ML("CoreMLExecutionProvider"),
-  XNNPACK("XnnpackExecutionProvider");
+  XNNPACK("XnnpackExecutionProvider"),
+  AZURE("AzureExecutionProvider");
 
   private static final Map<String, OrtProvider> valueMap = new HashMap<>(values().length);
 
diff --git a/onnxruntime/core/framework/cloud_invoker.cc b/onnxruntime/core/framework/cloud_invoker.cc
index d6883d408f2e4..a2e9ec97cf931 100644
--- a/onnxruntime/core/framework/cloud_invoker.cc
+++ b/onnxruntime/core/framework/cloud_invoker.cc
@@ -2,7 +2,9 @@
 // Licensed under the MIT License.
 
 #ifdef USE_AZURE
+#define CURL_STATICLIB
 #include "http_client.h"
+#include "curl/curl.h"
 #include "core/common/common.h"
 #include "core/framework/cloud_invoker.h"
 #include "core/framework/ort_value.h"
@@ -18,13 +20,14 @@ namespace onnxruntime {
 
 namespace tc = triton::client;
 
-const char* kAzureUri = "azure.uri";
-const char* kAzureModelName = "azure.model_name";
-const char* kAzureModelVer = "azure.model_version";
-const char* kAzureVerbose = "azure.verbose";
-const char* kAzureEndpointType = "azure.endpoint_type";
-const char* kAzureAuthKey = "azure.auth_key";
-const char* kAzureTriton = "triton";
+constexpr const char* kAzureUri = "azure.uri";
+constexpr const char* kAzureModelName = "azure.model_name";
+constexpr const char* kAzureModelVer = "azure.model_version";
+constexpr const char* kAzureVerbose = "azure.verbose";
+constexpr const char* kAzureEndpointType = "azure.endpoint_type";
+constexpr const char* kAzureAuthKey = "azure.auth_key";
+constexpr const char* kAzureTriton = "triton";
+constexpr const char* kAzureOpenAI = "openai";
 
 CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config,
                                            const AllocatorPtr& allocator) : config_(config), allocator_(allocator) {
@@ -33,6 +36,163 @@ CloudEndPointInvoker::CloudEndPointInvoker(const CloudEndPointConfig& config,
   }
 }
 
+class CurlGlobal {
+ public:
+  static void Init() {
+    static CurlGlobal curl_global;
+  }
+
+ private:
+  CurlGlobal() {
+    // Thread-safety is a must since curl might also be initialized in triton client.
+    const auto* info = curl_version_info(CURLVERSION_NOW);
+    ORT_ENFORCE(info->features & CURL_VERSION_THREADSAFE, "curl global init not thread-safe, need to upgrade curl version!");
+    ORT_ENFORCE(curl_global_init(CURL_GLOBAL_DEFAULT) == CURLE_OK, "Failed to initialize curl global env!");
+  }
+  ~CurlGlobal() {
+    curl_global_cleanup();
+  }
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlGlobal);
+};
+
+// OpenAIInvoker
+class OpenAIInvoker : public CloudEndPointInvoker {
+ public:
+  OpenAIInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator);
+  onnxruntime::Status Send(const CloudEndPointConfig& run_options,
+                           const InlinedVector<std::string>& input_names,
+                           gsl::span<const OrtValue> ort_inputs,
+                           const InlinedVector<std::string>& output_names,
+                           std::vector<OrtValue>& ort_outputs) const override;
+
+ private:
+  std::string uri_;
+  std::string model_name_;
+};
+
+OpenAIInvoker::OpenAIInvoker(const CloudEndPointConfig& config,
+                             const AllocatorPtr& allocator) : CloudEndPointInvoker(config, allocator) {
+  CurlGlobal::Init();
+  ReadConfig(kAzureUri, uri_);
+  ReadConfig(kAzureModelName, model_name_);
+}
+
+struct StringBuffer {
+  StringBuffer() = default;
+  ~StringBuffer() = default;
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(StringBuffer);
+  std::stringstream ss_;
+};
+
+// apply the callback only when response is for sure to be a '/0' terminated string
+static size_t WriteStringCallback(void* contents, size_t size, size_t nmemb, void* userp) {
+  try {
+    size_t realsize = size * nmemb;
+    auto buffer = reinterpret_cast<struct StringBuffer*>(userp);
+    buffer->ss_.write(reinterpret_cast<const char*>(contents), realsize);
+    return realsize;
+  } catch (...) {
+    // exception caught, abort write
+    return CURLcode::CURLE_WRITE_ERROR;
+  }
+}
+
+using CurlWriteCallBack = size_t (*)(void*, size_t, size_t, void*);
+
+class CurlHandler {
+ public:
+  CurlHandler(CurlWriteCallBack call_back) : curl_(curl_easy_init(), curl_easy_cleanup),
+                                             headers_(nullptr, curl_slist_free_all),
+                                             from_holder_(from_, curl_formfree) {
+    curl_easy_setopt(curl_.get(), CURLOPT_BUFFERSIZE, 102400L);
+    curl_easy_setopt(curl_.get(), CURLOPT_NOPROGRESS, 1L);
+    curl_easy_setopt(curl_.get(), CURLOPT_USERAGENT, "curl/7.83.1");
+    curl_easy_setopt(curl_.get(), CURLOPT_MAXREDIRS, 50L);
+    curl_easy_setopt(curl_.get(), CURLOPT_FTP_SKIP_PASV_IP, 1L);
+    curl_easy_setopt(curl_.get(), CURLOPT_TCP_KEEPALIVE, 1L);
+    curl_easy_setopt(curl_.get(), CURLOPT_WRITEFUNCTION, call_back);
+  }
+  ~CurlHandler() = default;
+
+  void AddHeader(const char* data) {
+    headers_.reset(curl_slist_append(headers_.release(), data));
+  }
+  template <typename... Args>
+  void AddForm(Args... args) {
+    curl_formadd(&from_, &last_, args...);
+  }
+  template <typename T>
+  void SetOption(CURLoption opt, T val) {
+    curl_easy_setopt(curl_.get(), opt, val);
+  }
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CurlHandler);
+  CURLcode Perform() {
+    SetOption(CURLOPT_HTTPHEADER, headers_.get());
+    SetOption(CURLOPT_HTTPPOST, from_);
+    return curl_easy_perform(curl_.get());
+  }
+
+ private:
+  std::unique_ptr<CURL, decltype(curl_easy_cleanup)*> curl_;
+  std::unique_ptr<curl_slist, decltype(curl_slist_free_all)*> headers_;
+  curl_httppost* from_{};
+  curl_httppost* last_{};
+  std::unique_ptr<curl_httppost, decltype(curl_formfree)*> from_holder_;
+};
+
+onnxruntime::Status OpenAIInvoker::Send(const CloudEndPointConfig& run_options,
+                                        const InlinedVector<std::string>& /*input_names*/,
+                                        gsl::span<const OrtValue> ort_inputs,
+                                        const InlinedVector<std::string>& /*output_names*/,
+                                        std::vector<OrtValue>& ort_outputs) const {
+  const auto auth_key_iter = run_options.find(kAzureAuthKey);
+  if (run_options.end() == auth_key_iter || auth_key_iter->second.empty()) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "auth key must be specified for openai client");
+  }
+  long verbose = 0;
+  const auto verbose_iter = run_options.find(kAzureVerbose);
+  if (run_options.end() != verbose_iter) {
+    verbose = verbose_iter->second != "0" ? 1L : 0L;
+  }
+
+  CurlHandler curl_handler(WriteStringCallback);
+  StringBuffer string_buffer;
+
+  std::string full_auth = std::string{"Authorization: Bearer "} + auth_key_iter->second;
+  curl_handler.AddHeader(full_auth.c_str());
+  curl_handler.AddHeader("Content-Type: multipart/form-data");
+
+  const auto& tensor = ort_inputs[0].Get<Tensor>();
+  auto data_size = tensor.SizeInBytes();
+  curl_handler.AddForm(CURLFORM_COPYNAME, "model", CURLFORM_COPYCONTENTS, model_name_.c_str(), CURLFORM_END);
+  curl_handler.AddForm(CURLFORM_COPYNAME, "response_format", CURLFORM_COPYCONTENTS, "text", CURLFORM_END);
+  curl_handler.AddForm(CURLFORM_COPYNAME, "file", CURLFORM_BUFFER, "non_exist.wav", CURLFORM_BUFFERPTR, tensor.DataRaw(),
+                       CURLFORM_BUFFERLENGTH, data_size, CURLFORM_END);
+
+  curl_handler.SetOption(CURLOPT_URL, uri_.c_str());
+  curl_handler.SetOption(CURLOPT_VERBOSE, verbose);
+  curl_handler.SetOption(CURLOPT_WRITEDATA, (void*)&string_buffer);
+
+  auto curl_ret = curl_handler.Perform();
+  if (CURLE_OK != curl_ret) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, curl_easy_strerror(curl_ret));
+  }
+
+  auto output_tensor = std::make_unique<Tensor>(onnxruntime::DataTypeImpl::GetType<std::string>(), TensorShape{1}, allocator_);
+  if (!output_tensor) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor");
+  }
+
+  auto* output_string = output_tensor->MutableData<std::string>();
+  *output_string = string_buffer.ss_.str();
+  auto tensor_type = DataTypeImpl::GetType<Tensor>();
+  ort_outputs.clear();
+  ort_outputs.emplace_back(output_tensor.release(), tensor_type, tensor_type->GetDeleteFunc());
+  return Status::OK();
+}
+
+// AzureTritonInvoker
 class AzureTritonInvoker : public CloudEndPointInvoker {
  public:
   AzureTritonInvoker(const CloudEndPointConfig& config, const AllocatorPtr& allocator);
@@ -287,6 +447,9 @@ Status CloudEndPointInvoker::CreateInvoker(const CloudEndPointConfig& config,
       if (iter->second == kAzureTriton) {
         invoker = std::make_unique<AzureTritonInvoker>(config, allocator);
         return status;
+      } else if (iter->second == kAzureOpenAI) {
+        invoker = std::make_unique<OpenAIInvoker>(config, allocator);
+        return status;
       }  // else other endpoint types ...
     }
     status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
index 3d64c2c67cc8e..b4516b4359821 100644
--- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
+++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
@@ -50,6 +50,16 @@ parameters:
   type: string
   default: '0'
 
+- name: NugetPackageSuffix
+  displayName: Suffix to append to nuget package
+  type: string
+  default: ''
+
+- name: AdditionalBuildFlag
+  displayName: Build flags to append to build command
+  type: string
+  default: ''
+
 resources:
   repositories:
   - repository: onnxruntime-inference-examples # The name used to reference this repository in the checkout step
@@ -100,9 +110,9 @@ stages:
     DoCompliance: ${{ parameters.DoCompliance }}
     DoEsrp: ${{ parameters.DoEsrp }}
     IsReleaseBuild: ${{ parameters.IsReleaseBuild }}
-    OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime'
+    OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime${{ parameters.NugetPackageSuffix }}'
     AdditionalBuildFlags: ''
-    AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos'
+    AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos ${{parameters.AdditionalBuildFlag}}'
     BuildVariant: 'default'
     SpecificArtifact: ${{ parameters.SpecificArtifact }}
     BuildId: ${{ parameters.BuildId }}
diff --git a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
index 2c42ad4693ef4..708ea540d445b 100644
--- a/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml
@@ -190,4 +190,25 @@ stages:
         ORT_EP_NAME: CPU
         GenerateDocumentation: false
         WITH_CACHE: true
-        MachinePool: 'onnxruntime-Win2019-CPU-training-AMD'
\ No newline at end of file
+        MachinePool: 'onnxruntime-Win2019-CPU-training-AMD'
+
+- stage: x64_release_azure
+  dependsOn: []
+  jobs:
+    - template: templates/win-ci-vs-2019.yml
+      parameters:
+        BuildConfig: 'RelWithDebInfo'
+        EnvSetupScript: setup_env_azure.bat
+        buildArch: x64
+        additionalBuildFlags: --use_azure
+        msbuildPlatform: x64
+        isX86: false
+        job_name_suffix: x64_release_azure
+        RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
+        RunStaticCodeAnalysis: false
+        EnablePython: false
+        isTraining: false
+        ORT_EP_NAME: CPU
+        GenerateDocumentation: false
+        WITH_CACHE: true
+        MachinePool: 'onnxruntime-Win-CPU-2019'
\ No newline at end of file