diff --git a/.gitignore b/.gitignore
index dc105c0c..c6580ca7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,6 @@ Cargo.lock
 
 # directory used to store images
 data
+
+.env
+.idea
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..abdcb70a
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "openapi/openai-openapi"]
+	path = openapi/openai-openapi
+	url = git@github.com:openai/openai-openapi.git
diff --git a/async-openai/src/chat.rs b/async-openai/src/chat.rs
index 7efa83b3..d5001cfa 100644
--- a/async-openai/src/chat.rs
+++ b/async-openai/src/chat.rs
@@ -27,7 +27,11 @@ impl<'c, C: Config> Chat<'c, C> {
                 "When stream is true, use Chat::create_stream".into(),
             ));
         }
-        self.client.post("/chat/completions", request).await
+        if request.data_sources.is_none() {
+            self.client.post("/chat/completions", request).await
+        } else {
+            self.client.post("/extensions/chat/completions", request).await
+        }
     }
 
     /// Creates a completion for the chat message
diff --git a/async-openai/src/client.rs b/async-openai/src/client.rs
index 024ca523..c122247e 100644
--- a/async-openai/src/client.rs
+++ b/async-openai/src/client.rs
@@ -1,6 +1,9 @@
+use std::fmt::{Display, Formatter};
 use std::pin::Pin;
+use std::str::from_utf8;
 
 use futures::{stream::StreamExt, Stream};
+use reqwest::header::{CONTENT_TYPE, HeaderValue};
 use reqwest_eventsource::{Event, EventSource, RequestBuilderExt};
 use serde::{de::DeserializeOwned, Serialize};
 
@@ -13,6 +16,7 @@ use crate::{
     moderation::Moderations,
     Audio, Chat, Completions, Embeddings, FineTunes, Models,
 };
+use crate::types::Stop::String;
 
 #[derive(Debug, Clone)]
 /// Client is a container for config, backoff and http_client
@@ -151,12 +155,38 @@ impl<C: Config> Client<C> {
         O: DeserializeOwned,
     {
         let request_maker = || async {
+            let url = self.config.url(path);
+            let query = &self.config.query();
+            let mut headers = self.config.headers();
+
+            let body = serde_json::to_vec(&request)?;
+            if !headers.contains_key(CONTENT_TYPE) {
+                headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
+            }
+
+            struct BodyDisplay {
+                body: Vec<u8>
+            }
+            impl Display for BodyDisplay {
+                fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+                    match from_utf8(&self.body) {
+                        Ok(body) => f.write_str(body),
+                        Err(_) => f.write_str("Cannot display body"),
+                    }
+                }
+            }
+
+            tracing::debug!("url: {}", url);
+            tracing::debug!("query: {:?}", query);
+            tracing::debug!("headers: {:?}", headers);
+            tracing::debug!("body: {}", BodyDisplay{body: body.clone()});
+
             Ok(self
                 .http_client
-                .post(self.config.url(path))
-                .query(&self.config.query())
-                .headers(self.config.headers())
-                .json(&request)
+                .post(url)
+                .query(query)
+                .headers(headers)
+                .body(body)
                 .build()?)
         };
 
diff --git a/async-openai/src/error.rs b/async-openai/src/error.rs
index 57c9c7eb..8be13fe6 100644
--- a/async-openai/src/error.rs
+++ b/async-openai/src/error.rs
@@ -1,5 +1,6 @@
 //! Errors originating from API calls, parsing responses, and reading-or-writing to the file system.
 use serde::Deserialize;
+use serde_json::Error;
 
 #[derive(Debug, thiserror::Error)]
 pub enum OpenAIError {
@@ -12,6 +13,9 @@ pub enum OpenAIError {
     /// Error when a response cannot be deserialized into a Rust type
     #[error("failed to deserialize api response: {0}")]
     JSONDeserialize(serde_json::Error),
+    /// Error when serialize request for send to api
+    #[error("failed to serialize request for send to api: {0}")]
+    JSONSerialize(serde_json::Error),
     /// Error on the client side when saving file to file system
     #[error("failed to save file: {0}")]
     FileSaveError(String),
@@ -49,3 +53,9 @@ pub(crate) fn map_deserialization_error(e: serde_json::Error, bytes: &[u8]) -> O
     );
     OpenAIError::JSONDeserialize(e)
 }
+
+impl From<serde_json::Error> for OpenAIError {
+    fn from(value: Error) -> Self {
+        OpenAIError::JSONSerialize(value)
+    }
+}
diff --git a/async-openai/src/types/types.rs b/async-openai/src/types/types.rs
index 6f569cac..49ddf63d 100644
--- a/async-openai/src/types/types.rs
+++ b/async-openai/src/types/types.rs
@@ -762,6 +762,56 @@ pub struct ChatCompletionFunctions {
     pub parameters: Option<serde_json::Value>,
 }
 
+/// parameters:
+//   endpoint: https://mysearchexample.search.windows.net
+//   key: '***(admin key)'
+//   indexName: my-chunk-index
+//   fieldsMapping:
+//     titleField: productName
+//     urlField: productUrl
+//     filepathField: productFilePath
+//     contentFields:
+//     - productDescription
+//     contentFieldsSeparator: |2+
+//
+//   topNDocuments: 5
+//   queryType: semantic
+//   semanticConfiguration: defaultConfiguration
+//   inScope: true
+//   roleInformation: roleInformation
+#[derive(Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
+#[builder(name = "AzureCognitiveSearchParametersArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+#[serde(rename_all = "camelCase")]
+pub struct AzureCognitiveSearchParameters {
+    pub endpoint: String,
+    pub key: String,
+    pub index_name: String,
+}
+
+#[derive(Clone, Serialize, Debug, Builder, Deserialize, PartialEq)]
+#[builder(name = "AzureCognitiveSearchDataSourceArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct AzureCognitiveSearchDataSource {
+    #[serde(rename = "type")]
+    pub _type: String,
+    pub parameters: AzureCognitiveSearchParameters,
+}
+impl Default for AzureCognitiveSearchDataSource {
+    fn default() -> Self {
+       Self {
+           _type: "AzureCognitiveSearch".to_string(),
+           parameters: Default::default(),
+       }
+    }
+}
+
 #[derive(Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
 #[builder(name = "CreateChatCompletionRequestArgs")]
 #[builder(pattern = "mutable")]
@@ -771,11 +821,17 @@ pub struct ChatCompletionFunctions {
 pub struct CreateChatCompletionRequest {
     /// ID of the model to use.
     /// See the [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API.
+    #[serde(skip_serializing_if = "String::is_empty")]
     pub model: String,
 
     /// A list of messages comprising the conversation so far. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
     pub messages: Vec<ChatCompletionRequestMessage>, // min: 1
 
+    /// The data sources to be used for the Azure OpenAI on your data feature
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(rename = "dataSources")]
+    pub data_sources: Option<Vec<AzureCognitiveSearchDataSource>>,
+
     /// A list of functions the model may generate JSON inputs for.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub functions: Option<Vec<ChatCompletionFunctions>>,
@@ -820,6 +876,7 @@ pub struct CreateChatCompletionRequest {
     /// The maximum number of [tokens](https://platform.openai.com/tokenizer) to generate in the chat completion.
     ///
     /// The total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) for counting tokens.
+    #[serde(skip_serializing_if = "Option::is_none")]
     pub max_tokens: Option<u16>,
 
     /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
diff --git a/openapi/README.md b/openapi/README.md
new file mode 100644
index 00000000..270fe858
--- /dev/null
+++ b/openapi/README.md
@@ -0,0 +1,12 @@
+
+
+# [Official repository of OpenAPI for OpenAI ](https://github.com/openai/openai-openapi)
+
+This repo was copy in [openai-openapi](./openai-openapi)
+
+
+# [Official OpenAPI for OpenAI](specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/)
+
+versions:
+- [azure-openai-openapi 2023-09-01-preview.yaml](./azure-openai-openapi_2023-09-01-preview.yaml)
+
diff --git a/openapi/azure-openai-openapi_2023-09-01-preview.yaml b/openapi/azure-openai-openapi_2023-09-01-preview.yaml
new file mode 100644
index 00000000..402c1731
--- /dev/null
+++ b/openapi/azure-openai-openapi_2023-09-01-preview.yaml
@@ -0,0 +1,1232 @@
+openapi: 3.0.0
+info:
+  title: Azure OpenAI Service API
+  description: Azure OpenAI APIs for completions and search
+  version: 2023-09-01-preview
+servers:
+- url: https://{endpoint}/openai
+  variables:
+    endpoint:
+      default: your-resource-name.openai.azure.com
+security:
+- bearer:
+  - api.read
+- apiKey: []
+paths:
+  /deployments/{deployment-id}/completions:
+    post:
+      summary: Creates a completion for the provided prompt, parameters and chosen
+        model.
+      operationId: Completions_Create
+      parameters:
+      - in: path
+        name: deployment-id
+        required: true
+        schema:
+          type: string
+          example: davinci
+          description: Deployment id of the model which was deployed.
+      - in: query
+        name: api-version
+        required: true
+        schema:
+          type: string
+          example: 2023-09-01-preview
+          description: api version
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                prompt:
+                  description: |-
+                    The prompt(s) to generate completions for, encoded as a string or array of strings.
+                    Note that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. Maximum allowed size of string list is 2048.
+                  oneOf:
+                  - type: string
+                    default: ''
+                    example: This is a test.
+                    nullable: true
+                  - type: array
+                    items:
+                      type: string
+                      default: ''
+                      example: This is a test.
+                      nullable: false
+                    description: Array size minimum of 1 and maximum of 2048
+                max_tokens:
+                  description: The token count of your prompt plus max_tokens cannot
+                    exceed the model's context length. Most models have a context
+                    length of 2048 tokens (except for the newest models, which support
+                    4096). Has minimum of 0.
+                  type: integer
+                  default: 16
+                  example: 16
+                  nullable: true
+                temperature:
+                  description: |-
+                    What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
+                    We generally recommend altering this or top_p but not both.
+                  type: number
+                  default: 1
+                  example: 1
+                  nullable: true
+                top_p:
+                  description: |-
+                    An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+                    We generally recommend altering this or temperature but not both.
+                  type: number
+                  default: 1
+                  example: 1
+                  nullable: true
+                logit_bias:
+                  description: Defaults to null. Modify the likelihood of specified
+                    tokens appearing in the completion. Accepts a json object that
+                    maps tokens (specified by their token ID in the GPT tokenizer)
+                    to an associated bias value from -100 to 100. You can use this
+                    tokenizer tool (which works for both GPT-2 and GPT-3) to convert
+                    text to token IDs. Mathematically, the bias is added to the logits
+                    generated by the model prior to sampling. The exact effect will
+                    vary per model, but values between -1 and 1 should decrease or
+                    increase likelihood of selection; values like -100 or 100 should
+                    result in a ban or exclusive selection of the relevant token.
+                    As an example, you can pass {"50256" &#58; -100} to prevent the
+                    <|endoftext|> token from being generated.
+                  type: object
+                  nullable: false
+                user:
+                  description: A unique identifier representing your end-user, which
+                    can help monitoring and detecting abuse
+                  type: string
+                  nullable: false
+                n:
+                  description: |-
+                    How many completions to generate for each prompt. Minimum of 1 and maximum of 128 allowed.
+                    Note: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop.
+                  type: integer
+                  default: 1
+                  example: 1
+                  nullable: true
+                stream:
+                  description: 'Whether to stream back partial progress. If set, tokens
+                    will be sent as data-only server-sent events as they become available,
+                    with the stream terminated by a data: [DONE] message.'
+                  type: boolean
+                  nullable: true
+                  default: false
+                logprobs:
+                  description: |-
+                    Include the log probabilities on the logprobs most likely tokens, as well the chosen tokens. For example, if logprobs is 5, the API will return a list of the 5 most likely tokens. The API will always return the logprob of the sampled token, so there may be up to logprobs+1 elements in the response.
+                    Minimum of 0 and maximum of 5 allowed.
+                  type: integer
+                  default: null
+                  nullable: true
+                suffix:
+                  type: string
+                  nullable: true
+                  description: The suffix that comes after a completion of inserted
+                    text.
+                echo:
+                  description: Echo back the prompt in addition to the completion
+                  type: boolean
+                  default: false
+                  nullable: true
+                stop:
+                  description: Up to 4 sequences where the API will stop generating
+                    further tokens. The returned text will not contain the stop sequence.
+                  oneOf:
+                  - type: string
+                    default: <|endoftext|>
+                    example: |2+
+
+                    nullable: true
+                  - type: array
+                    items:
+                      type: string
+                      example: |2+
+
+                      nullable: false
+                    description: Array minimum size of 1 and maximum of 4
+                completion_config:
+                  type: string
+                  nullable: true
+                presence_penalty:
+                  description: Number between -2.0 and 2.0. Positive values penalize
+                    new tokens based on whether they appear in the text so far, increasing
+                    the model's likelihood to talk about new topics.
+                  type: number
+                  default: 0
+                frequency_penalty:
+                  description: Number between -2.0 and 2.0. Positive values penalize
+                    new tokens based on their existing frequency in the text so far,
+                    decreasing the model's likelihood to repeat the same line verbatim.
+                  type: number
+                  default: 0
+                best_of:
+                  description: |-
+                    Generates best_of completions server-side and returns the "best" (the one with the highest log probability per token). Results cannot be streamed.
+                    When used with n, best_of controls the number of candidate completions and n specifies how many to return - best_of must be greater than n.
+                    Note: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop. Has maximum value of 128.
+                  type: integer
+            example:
+              prompt: |-
+                Negate the following sentence.The price for bubblegum increased on thursday.
+
+                 Negated Sentence:
+              max_tokens: 50
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  id:
+                    type: string
+                  object:
+                    type: string
+                  created:
+                    type: integer
+                  model:
+                    type: string
+                  prompt_filter_results:
+                    $ref: '#/components/schemas/promptFilterResults'
+                  choices:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        text:
+                          type: string
+                        index:
+                          type: integer
+                        logprobs:
+                          type: object
+                          properties:
+                            tokens:
+                              type: array
+                              items:
+                                type: string
+                            token_logprobs:
+                              type: array
+                              items:
+                                type: number
+                            top_logprobs:
+                              type: array
+                              items:
+                                type: object
+                                additionalProperties:
+                                  type: number
+                            text_offset:
+                              type: array
+                              items:
+                                type: integer
+                          nullable: true
+                        finish_reason:
+                          type: string
+                        content_filter_results:
+                          $ref: '#/components/schemas/contentFilterResults'
+                  usage:
+                    type: object
+                    properties:
+                      completion_tokens:
+                        type: number
+                        format: int32
+                      prompt_tokens:
+                        type: number
+                        format: int32
+                      total_tokens:
+                        type: number
+                        format: int32
+                    required:
+                    - prompt_tokens
+                    - total_tokens
+                    - completion_tokens
+                required:
+                - id
+                - object
+                - created
+                - model
+                - choices
+              example:
+                model: davinci
+                object: text_completion
+                id: cmpl-4509KAos68kxOqpE2uYGw81j6m7uo
+                created: 1637097562
+                choices:
+                - index: 0
+                  text: The price for bubblegum decreased on thursday.
+                  logprobs: null
+                  finish_reason: stop
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+        default:
+          description: Service unavailable
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/errorResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+  /deployments/{deployment-id}/embeddings:
+    post:
+      summary: Get a vector representation of a given input that can be easily consumed
+        by machine learning models and algorithms.
+      operationId: embeddings_create
+      parameters:
+      - in: path
+        name: deployment-id
+        required: true
+        schema:
+          type: string
+          example: ada-search-index-v1
+        description: The deployment id of the model which was deployed.
+      - in: query
+        name: api-version
+        required: true
+        schema:
+          type: string
+          example: 2023-09-01-preview
+          description: api version
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              additionalProperties: true
+              properties:
+                input:
+                  description: |-
+                    Input text to get embeddings for, encoded as a string. To get embeddings for multiple inputs in a single request, pass an array of strings. Each input must not exceed 2048 tokens in length.
+                    Unless you are embedding code, we suggest replacing newlines (\n) in your input with a single space, as we have observed inferior results when newlines are present.
+                  oneOf:
+                  - type: string
+                    default: ''
+                    example: This is a test.
+                    nullable: true
+                  - type: array
+                    minItems: 1
+                    maxItems: 2048
+                    items:
+                      type: string
+                      minLength: 1
+                      example: This is a test.
+                      nullable: false
+                user:
+                  description: A unique identifier representing your end-user, which
+                    can help monitoring and detecting abuse.
+                  type: string
+                  nullable: false
+                input_type:
+                  description: input type of embedding search to use
+                  type: string
+                  example: query
+              required:
+              - input
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  object:
+                    type: string
+                  model:
+                    type: string
+                  data:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        index:
+                          type: integer
+                        object:
+                          type: string
+                        embedding:
+                          type: array
+                          items:
+                            type: number
+                      required:
+                      - index
+                      - object
+                      - embedding
+                  usage:
+                    type: object
+                    properties:
+                      prompt_tokens:
+                        type: integer
+                      total_tokens:
+                        type: integer
+                    required:
+                    - prompt_tokens
+                    - total_tokens
+                required:
+                - object
+                - model
+                - data
+                - usage
+  /deployments/{deployment-id}/chat/completions:
+    post:
+      summary: Creates a completion for the chat message
+      operationId: ChatCompletions_Create
+      parameters:
+      - in: path
+        name: deployment-id
+        required: true
+        schema:
+          type: string
+          description: Deployment id of the model which was deployed.
+      - in: query
+        name: api-version
+        required: true
+        schema:
+          type: string
+          example: 2023-09-01-preview
+          description: api version
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/createChatCompletionRequest'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/createChatCompletionResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+        default:
+          description: Service unavailable
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/errorResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+  /deployments/{deployment-id}/extensions/chat/completions:
+    post:
+      summary: Using extensions to creates a completion for the chat messages.
+      operationId: ExtensionsChatCompletions_Create
+      parameters:
+      - in: path
+        name: deployment-id
+        required: true
+        schema:
+          type: string
+          description: Deployment id of the model which was deployed.
+      - in: query
+        name: api-version
+        required: true
+        schema:
+          type: string
+          example: 2023-09-01-preview
+          description: api version
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/extensionsChatCompletionsRequest'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/extensionsChatCompletionsResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+        default:
+          description: Service unavailable
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/errorResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+  /deployments/{deployment-id}/audio/transcriptions:
+    post:
+      summary: Transcribes audio into the input language.
+      operationId: Transcriptions_Create
+      parameters:
+      - in: path
+        name: deployment-id
+        required: true
+        schema:
+          type: string
+          example: whisper
+          description: Deployment id of the whisper model.
+      - in: query
+        name: api-version
+        required: true
+        description: Api version.
+        schema:
+          type: string
+          example: 2023-05-20-preview
+          description: Api version.
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/createTranscriptionRequest'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                oneOf:
+                - $ref: '#/components/schemas/audioResponse'
+                - $ref: '#/components/schemas/audioVerboseResponse'
+            text/plain:
+              schema:
+                type: string
+                description: Transcribed text in the output format (when response_format
+                  was one of text, vtt or srt).
+  /deployments/{deployment-id}/audio/translations:
+    post:
+      summary: Transcribes and translates input audio into English text.
+      operationId: Translations_Create
+      parameters:
+      - in: path
+        name: deployment-id
+        required: true
+        schema:
+          type: string
+          example: whisper
+          description: Deployment id of the whisper model which was deployed.
+      - in: query
+        name: api-version
+        required: true
+        description: Api version.
+        schema:
+          type: string
+          example: 2023-05-20-preview
+          description: Api version.
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/createTranslationRequest'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                oneOf:
+                - $ref: '#/components/schemas/audioResponse'
+                - $ref: '#/components/schemas/audioVerboseResponse'
+            text/plain:
+              schema:
+                type: string
+                description: Transcribed text in the output format (when response_format
+                  was one of text, vtt or srt).
+components:
+  schemas:
+    errorResponse:
+      type: object
+      properties:
+        error:
+          $ref: '#/components/schemas/error'
+    errorBase:
+      type: object
+      properties:
+        code:
+          type: string
+        message:
+          type: string
+    error:
+      type: object
+      allOf:
+      - $ref: '#/components/schemas/errorBase'
+      properties:
+        code:
+          type: string
+        message:
+          type: string
+        param:
+          type: string
+        type:
+          type: string
+        inner_error:
+          $ref: '#/components/schemas/innerError'
+    innerError:
+      description: Inner error with additional details.
+      type: object
+      properties:
+        code:
+          $ref: '#/components/schemas/innerErrorCode'
+        content_filter_results:
+          $ref: '#/components/schemas/contentFilterResults'
+    innerErrorCode:
+      description: Error codes for the inner error object.
+      enum:
+      - ResponsibleAIPolicyViolation
+      type: string
+      x-ms-enum:
+        name: InnerErrorCode
+        modelAsString: true
+        values:
+        - value: ResponsibleAIPolicyViolation
+          description: The prompt violated one of more content filter rules.
+    contentFilterResult:
+      type: object
+      properties:
+        severity:
+          type: string
+          enum:
+          - safe
+          - low
+          - medium
+          - high
+          x-ms-enum:
+            name: ContentFilterSeverity
+            modelAsString: true
+            values:
+            - value: safe
+              description: General content or related content in generic or non-harmful
+                contexts.
+            - value: low
+              description: Harmful content at a low intensity and risk level.
+            - value: medium
+              description: Harmful content at a medium intensity and risk level.
+            - value: high
+              description: Harmful content at a high intensity and risk level.
+        filtered:
+          type: boolean
+      required:
+      - severity
+      - filtered
+    contentFilterResults:
+      type: object
+      description: Information about the content filtering category (hate, sexual,
+        violence, self_harm), if it has been detected, as well as the severity level
+        (very_low, low, medium, high-scale that determines the intensity and risk
+        level of harmful content) and if it has been filtered or not.
+      properties:
+        sexual:
+          $ref: '#/components/schemas/contentFilterResult'
+        violence:
+          $ref: '#/components/schemas/contentFilterResult'
+        hate:
+          $ref: '#/components/schemas/contentFilterResult'
+        self_harm:
+          $ref: '#/components/schemas/contentFilterResult'
+        error:
+          $ref: '#/components/schemas/errorBase'
+    promptFilterResult:
+      type: object
+      description: Content filtering results for a single prompt in the request.
+      properties:
+        prompt_index:
+          type: integer
+        content_filter_results:
+          $ref: '#/components/schemas/contentFilterResults'
+    promptFilterResults:
+      type: array
+      description: Content filtering results for zero or more prompts in the request.
+        In a streaming request, results for different prompts may arrive at different
+        times or in different orders.
+      items:
+        $ref: '#/components/schemas/promptFilterResult'
+    chatCompletionsRequestCommon:
+      type: object
+      properties:
+        temperature:
+          description: |-
+            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+            We generally recommend altering this or `top_p` but not both.
+          type: number
+          minimum: 0
+          maximum: 2
+          default: 1
+          example: 1
+          nullable: true
+        top_p:
+          description: |-
+            An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+            We generally recommend altering this or `temperature` but not both.
+          type: number
+          minimum: 0
+          maximum: 1
+          default: 1
+          example: 1
+          nullable: true
+        stream:
+          description: 'If set, partial message deltas will be sent, like in ChatGPT.
+            Tokens will be sent as data-only server-sent events as they become available,
+            with the stream terminated by a `data: [DONE]` message.'
+          type: boolean
+          nullable: true
+          default: false
+        stop:
+          description: Up to 4 sequences where the API will stop generating further
+            tokens.
+          oneOf:
+          - type: string
+            nullable: true
+          - type: array
+            items:
+              type: string
+              nullable: false
+            minItems: 1
+            maxItems: 4
+            description: Array minimum size of 1 and maximum of 4
+          default: null
+        max_tokens:
+          description: The maximum number of tokens allowed for the generated answer.
+            By default, the number of tokens the model can return will be (4096 -
+            prompt tokens).
+          type: integer
+          default: 4096
+        presence_penalty:
+          description: Number between -2.0 and 2.0. Positive values penalize new tokens
+            based on whether they appear in the text so far, increasing the model's
+            likelihood to talk about new topics.
+          type: number
+          default: 0
+          minimum: -2
+          maximum: 2
+        frequency_penalty:
+          description: Number between -2.0 and 2.0. Positive values penalize new tokens
+            based on their existing frequency in the text so far, decreasing the model's
+            likelihood to repeat the same line verbatim.
+          type: number
+          default: 0
+          minimum: -2
+          maximum: 2
+        logit_bias:
+          description: Modify the likelihood of specified tokens appearing in the
+            completion. Accepts a json object that maps tokens (specified by their
+            token ID in the tokenizer) to an associated bias value from -100 to 100.
+            Mathematically, the bias is added to the logits generated by the model
+            prior to sampling. The exact effect will vary per model, but values between
+            -1 and 1 should decrease or increase likelihood of selection; values like
+            -100 or 100 should result in a ban or exclusive selection of the relevant
+            token.
+          type: object
+          nullable: true
+        user:
+          description: A unique identifier representing your end-user, which can help
+            Azure OpenAI to monitor and detect abuse.
+          type: string
+          example: user-1234
+          nullable: false
+    createChatCompletionRequest:
+      type: object
+      allOf:
+      - $ref: '#/components/schemas/chatCompletionsRequestCommon'
+      - properties:
+          messages:
+            description: A list of messages comprising the conversation so far. [Example
+              Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
+            type: array
+            minItems: 1
+            items:
+              $ref: '#/components/schemas/chatCompletionRequestMessage'
+          functions:
+            description: A list of functions the model may generate JSON inputs for.
+            type: array
+            minItems: 1
+            items:
+              $ref: '#/components/schemas/chatCompletionFunctions'
+          function_call:
+            description: Controls how the model responds to function calls. "none"
+              means the model does not call a function, and responds to the end-user.
+              "auto" means the model can pick between an end-user or calling a function.  Specifying
+              a particular function via `{"name":\ "my_function"}` forces the model
+              to call that function. "none" is the default when no functions are present.
+              "auto" is the default if functions are present.
+            oneOf:
+            - type: string
+              enum:
+              - none
+              - auto
+            - type: object
+              properties:
+                name:
+                  type: string
+                  description: The name of the function to call.
+              required:
+              - name
+          n:
+            type: integer
+            minimum: 1
+            maximum: 128
+            default: 1
+            example: 1
+            nullable: true
+            description: How many chat completion choices to generate for each input
+              message.
+      required:
+      - messages
+    chatCompletionFunctions:
+      type: object
+      properties:
+        name:
+          type: string
+          description: The name of the function to be called. Must be a-z, A-Z, 0-9,
+            or contain underscores and dashes, with a maximum length of 64.
+        description:
+          type: string
+          description: The description of what the function does.
+        parameters:
+          $ref: '#/components/schemas/chatCompletionFunctionParameters'
+      required:
+      - name
+    chatCompletionFunctionParameters:
+      type: object
+      description: The parameters the functions accepts, described as a JSON Schema
+        object. See the [guide](/docs/guides/gpt/function-calling) for examples, and
+        the [JSON Schema reference](https://json-schema.org/understanding-json-schema/)
+        for documentation about the format.
+      additionalProperties: true
+    chatCompletionRequestMessage:
+      type: object
+      properties:
+        role:
+          type: string
+          enum:
+          - system
+          - user
+          - assistant
+          - function
+          description: The role of the messages author. One of `system`, `user`, `assistant`,
+            or `function`.
+        content:
+          type: string
+          description: The contents of the message. `content` is required for all
+            messages except assistant messages with function calls.
+        name:
+          type: string
+          description: The name of the author of this message. `name` is required
+            if role is `function`, and it should be the name of the function whose
+            response is in the `content`. May contain a-z, A-Z, 0-9, and underscores,
+            with a maximum length of 64 characters.
+        function_call:
+          type: object
+          description: The name and arguments of a function that should be called,
+            as generated by the model.
+          properties:
+            name:
+              type: string
+              description: The name of the function to call.
+            arguments:
+              type: string
+              description: The arguments to call the function with, as generated by
+                the model in JSON format. Note that the model does not always generate
+                valid JSON, and may hallucinate parameters not defined by your function
+                schema. Validate the arguments in your code before calling your function.
+      required:
+      - role
+    createChatCompletionResponse:
+      type: object
+      allOf:
+      - $ref: '#/components/schemas/chatCompletionsResponseCommon'
+      - properties:
+          prompt_filter_results:
+            $ref: '#/components/schemas/promptFilterResults'
+          choices:
+            type: array
+            items:
+              type: object
+              allOf:
+              - $ref: '#/components/schemas/chatCompletionChoiceCommon'
+              - properties:
+                  message:
+                    $ref: '#/components/schemas/chatCompletionResponseMessage'
+                  content_filter_results:
+                    $ref: '#/components/schemas/contentFilterResults'
+      required:
+      - id
+      - object
+      - created
+      - model
+      - choices
+    chatCompletionResponseMessage:
+      type: object
+      properties:
+        role:
+          type: string
+          enum:
+          - system
+          - user
+          - assistant
+          - function
+          description: The role of the author of this message.
+        content:
+          type: string
+          description: The contents of the message.
+        function_call:
+          type: object
+          description: The name and arguments of a function that should be called,
+            as generated by the model.
+          properties:
+            name:
+              type: string
+              description: The name of the function to call.
+            arguments:
+              type: string
+              description: The arguments to call the function with, as generated by
+                the model in JSON format. Note that the model does not always generate
+                valid JSON, and may hallucinate parameters not defined by your function
+                schema. Validate the arguments in your code before calling your function.
+      required:
+      - role
+    extensionsChatCompletionsRequest:
+      type: object
+      description: Request for the chat completions using extensions
+      required:
+      - messages
+      allOf:
+      - $ref: '#/components/schemas/chatCompletionsRequestCommon'
+      - properties:
+          messages:
+            type: array
+            items:
+              $ref: '#/components/schemas/message'
+          dataSources:
+            type: array
+            description: The data sources to be used for the Azure OpenAI on your
+              data feature.
+            items:
+              $ref: '#/components/schemas/dataSource'
+      example:
+        dataSources:
+        - type: AzureCognitiveSearch
+          parameters:
+            endpoint: https://mysearchexample.search.windows.net
+            key: '***(admin key)'
+            indexName: my-chunk-index
+            fieldsMapping:
+              titleField: productName
+              urlField: productUrl
+              filepathField: productFilePath
+              contentFields:
+              - productDescription
+              contentFieldsSeparator: |2+
+
+            topNDocuments: 5
+            queryType: semantic
+            semanticConfiguration: defaultConfiguration
+            inScope: true
+            roleInformation: roleInformation
+        messages:
+        - role: user
+          content: Where can I find a hiking place in Seattle?
+        temperature: 0.9
+    dataSource:
+      type: object
+      description: The data source to be used for the Azure OpenAI on your data feature.
+      properties:
+        type:
+          type: string
+          description: The data source type.
+        parameters:
+          type: object
+          description: The parameters to be used for the data source in runtime.
+          additionalProperties: true
+      required:
+      - type
+    message:
+      type: object
+      description: A chat message.
+      properties:
+        index:
+          type: integer
+          description: The index of the message in the conversation.
+        role:
+          type: string
+          enum:
+          - system
+          - user
+          - assistant
+          - tool
+          description: The role of the author of this message.
+        recipient:
+          type: string
+          example: Contoso.productsUsingGET
+          description: The recipient of the message in the format of <namespace>.<operation>.
+            Present if and only if the recipient is tool.
+        content:
+          type: string
+          description: The contents of the message
+        end_turn:
+          type: boolean
+          description: Whether the message ends the turn.
+        context:
+          type: object
+          description: The conversation context
+          nullable: true
+          properties:
+            messages:
+              type: array
+              description: Messages exchanged between model and extensions prior to
+                final message from model
+              minItems: 1
+              items:
+                $ref: '#/components/schemas/message'
+              nullable: true
+      required:
+      - role
+      - content
+    chatCompletionsResponseCommon:
+      type: object
+      properties:
+        id:
+          type: string
+        object:
+          type: string
+        created:
+          type: integer
+          format: unixtime
+        model:
+          type: string
+        usage:
+          type: object
+          properties:
+            prompt_tokens:
+              type: integer
+            completion_tokens:
+              type: integer
+            total_tokens:
+              type: integer
+          required:
+          - prompt_tokens
+          - completion_tokens
+          - total_tokens
+      required:
+      - id
+      - object
+      - created
+      - model
+    chatCompletionChoiceCommon:
+      type: object
+      properties:
+        index:
+          type: integer
+        finish_reason:
+          type: string
+    extensionsChatCompletionChoice:
+      type: object
+      allOf:
+      - $ref: '#/components/schemas/chatCompletionChoiceCommon'
+      - properties:
+          message:
+            description: The message returned by the service.
+            $ref: '#/components/schemas/message'
+    extensionsChatCompletionsResponse:
+      type: object
+      description: The response of the extensions chat completions.
+      allOf:
+      - $ref: '#/components/schemas/chatCompletionsResponseCommon'
+      - properties:
+          choices:
+            type: array
+            items:
+              $ref: '#/components/schemas/extensionsChatCompletionChoice'
+      example:
+        id: '1'
+        object: extensions.chat.completion
+        created: 1679201802
+        model: gpt-3.5-turbo-0301
+        choices:
+        - index: 0
+          finish_reason: stop
+          message:
+            role: assistant
+            content: Seattle is a great place for hiking! Here are some of the best
+              hiking places in Seattle according to Contoso Traveler [doc1] and West
+              Coast Traveler, Snow Lake, Mount Si, and Mount Tenerife [doc2]. I hope
+              this helps! Let me know if you need more information.
+            end_turn: true
+            context:
+              messages:
+              - role: tool
+                content: '{"citations":[{"filepath":"ContosoTraveler.pdf","content":"This
+                  is the content of the citation 1"},{"filepath":"WestCoastTraveler.html","content":"This
+                  is the content of the citation 2"},{"content":"This is the content
+                  of the citation 3 without filepath"}],"intent":"hiking place in
+                  seattle"}'
+                end_turn: false
+    createTranslationRequest:
+      type: object
+      description: Translation request.
+      properties:
+        file:
+          type: string
+          description: The audio file to translate.
+          format: binary
+        prompt:
+          type: string
+          description: An optional text to guide the model's style or continue a previous
+            audio segment. The prompt should be in English.
+        response_format:
+          $ref: '#/components/schemas/responseFormat'
+        temperature:
+          type: number
+          default: 0
+          description: The sampling temperature, between 0 and 1. Higher values like
+            0.8 will make the output more random, while lower values like 0.2 will
+            make it more focused and deterministic. If set to 0, the model will use
+            log probability to automatically increase the temperature until certain
+            thresholds are hit.
+      required:
+      - file
+    audioResponse:
+      description: Translation or transcription response when response_format was
+        json
+      type: object
+      properties:
+        text:
+          type: string
+          description: Translated or transcribed text.
+      required:
+      - text
+    audioVerboseResponse:
+      description: Translation or transcription response when response_format was
+        verbose_json
+      type: object
+      allOf:
+      - $ref: '#/components/schemas/audioResponse'
+      - properties:
+          task:
+            type: string
+            description: Type of audio task.
+            enum:
+            - transcribe
+            - translate
+            x-ms-enum:
+              modelAsString: true
+          language:
+            type: string
+            description: Language.
+          duration:
+            type: number
+            description: Duration.
+          segments:
+            type: array
+            items:
+              $ref: '#/components/schemas/audioSegment'
+      required:
+      - text
+    responseFormat:
+      title: ResponseFormat
+      description: Defines the format of the output.
+      enum:
+      - json
+      - text
+      - srt
+      - verbose_json
+      - vtt
+      type: string
+      x-ms-enum:
+        modelAsString: true
+    createTranscriptionRequest:
+      type: object
+      description: Transcription request.
+      properties:
+        file:
+          type: string
+          description: The audio file object to transcribe.
+          format: binary
+        prompt:
+          type: string
+          description: An optional text to guide the model's style or continue a previous
+            audio segment. The prompt should match the audio language.
+        response_format:
+          $ref: '#/components/schemas/responseFormat'
+        temperature:
+          type: number
+          default: 0
+          description: The sampling temperature, between 0 and 1. Higher values like
+            0.8 will make the output more random, while lower values like 0.2 will
+            make it more focused and deterministic. If set to 0, the model will use
+            log probability to automatically increase the temperature until certain
+            thresholds are hit.
+        language:
+          type: string
+          description: The language of the input audio. Supplying the input language
+            in ISO-639-1 format will improve accuracy and latency.
+      required:
+      - file
+    audioSegment:
+      type: object
+      description: Transcription or translation segment.
+      properties:
+        id:
+          type: integer
+          description: Segment identifier.
+        seek:
+          type: number
+          description: Offset of the segment.
+        start:
+          type: number
+          description: Segment start offset.
+        end:
+          type: number
+          description: Segment end offset.
+        text:
+          type: string
+          description: Segment text.
+        tokens:
+          type: array
+          items:
+            type: number
+            nullable: false
+          description: Tokens of the text.
+        temperature:
+          type: number
+          description: Temperature.
+        avg_logprob:
+          type: number
+          description: Average log probability.
+        compression_ratio:
+          type: number
+          description: Compression ratio.
+        no_speech_prob:
+          type: number
+          description: Probability of 'no speech'.
+  securitySchemes:
+    bearer:
+      type: oauth2
+      flows:
+        implicit:
+          authorizationUrl: https://login.microsoftonline.com/common/oauth2/v2.0/authorize
+          scopes: {}
+      x-tokenInfoFunc: api.middleware.auth.bearer_auth
+      x-scopeValidateFunc: api.middleware.auth.validate_scopes
+    apiKey:
+      type: apiKey
+      name: api-key
+      in: header
\ No newline at end of file
diff --git a/openapi/openai-openapi b/openapi/openai-openapi
new file mode 160000
index 00000000..58fee495
--- /dev/null
+++ b/openapi/openai-openapi
@@ -0,0 +1 @@
+Subproject commit 58fee4955e93bb5762ec612126441330a044f5d2