From 1dd2b8c403c3f443309f6c5c770c4c17a11a56fc Mon Sep 17 00:00:00 2001
From: JayaniH <jayani.hewa77@gmail.com>
Date: Wed, 18 Oct 2023 16:26:20 +0530
Subject: [PATCH] Update azure.openai.chat connector

---
 openapi/azure.openai.chat/Ballerina.toml      |    2 +-
 openapi/azure.openai.chat/Module.md           |   57 +-
 openapi/azure.openai.chat/Package.md          |    4 +-
 openapi/azure.openai.chat/client.bal          |   28 +-
 openapi/azure.openai.chat/openapi.yaml        |  718 +++++--
 .../azure.openai.chat/original-openapi.yaml   | 1748 +++++++++--------
 openapi/azure.openai.chat/types.bal           |  231 ++-
 openapi/azure.openai.chat/utils.bal           |    3 +
 8 files changed, 1740 insertions(+), 1051 deletions(-)

diff --git a/openapi/azure.openai.chat/Ballerina.toml b/openapi/azure.openai.chat/Ballerina.toml
index 745b18fe4..d8a2070ba 100644
--- a/openapi/azure.openai.chat/Ballerina.toml
+++ b/openapi/azure.openai.chat/Ballerina.toml
@@ -6,7 +6,7 @@ name = "azure.openai.chat"
 icon = "icon.png"
 distribution = "2201.4.1"
 repository = "https://github.com/ballerina-platform/openapi-connectors/tree/main/openapi/azure.openai.chat"
-version = "1.0.2"
+version = "2.0.0"
 authors = ["Ballerina"]
 [build-options]
 observabilityIncluded = true
diff --git a/openapi/azure.openai.chat/Module.md b/openapi/azure.openai.chat/Module.md
index 58ab5b082..53380115d 100644
--- a/openapi/azure.openai.chat/Module.md
+++ b/openapi/azure.openai.chat/Module.md
@@ -38,7 +38,7 @@ Create and initialize a `chat:Client` with the obtained `apiKey` and a `serviceU
     >**Note:** These operations are in the form of remote operations.
 
 
-   Following is an example of creating a conversation with an OpenAI gpt-35-turbo model:
+   Following is an example of creating a conversation with an Azure OpenAI chat model:
 
     ```ballerina
     public function main() returns error? {
@@ -48,14 +48,65 @@ Create and initialize a `chat:Client` with the obtained `apiKey` and a `serviceU
             serviceUrl = serviceUrl
         );
 
-        chat:Chat_completions_body chatBody = {
+        chat:CreateChatCompletionRequest chatBody = {
             messages: [{role: "user", content: "What is Ballerina?"}]  
         };
 
-        chat:Inline_response_200 chatResult = check chatClient->/deployments/["chat"]/chat/completions.post("2023-03-15-preview", chatBody);
+        chat:CreateChatCompletionResponse chatResult = check chatClient->/deployments/["chat"]/chat/completions.post("2023-08-01-preview", chatBody);
 
         io:println(chatResult);
     }
     ```
 
+    Following is a sample of using function calling with an Azure OpenAI chat model:
+
+    ```ballerina
+    public function main() returns error? {
+
+        final chat:Client chatClient = check new (
+            config = {auth: {apiKey: apiKey}},
+            serviceUrl = serviceUrl
+        );
+
+        chat:ChatCompletionRequestMessage[] messages = [{role: "user", content: "What is the weather in Seattle?"}];
+
+        chat:ChatCompletionFunctions[] functions = [
+            {
+                name: "get_current_weather",
+                description: "Get the current weather in a given location",
+                parameters: {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city or town to get the weather for"
+                        },
+                        "unit": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"]
+                        }
+                    },
+                    "required": ["location"]
+                }
+            }
+        ];
+
+        chat:CreateChatCompletionRequest chatBody = {messages, functions};
+
+        chat:CreateChatCompletionResponse chatResult = check chatClient->/deployments/["chat"]/chat/completions.post("2023-08-01-preview", chatBody);
+
+        io:println(chatResult);
+
+        chat:ChatCompletionRequestMessage_function_call? functionCall = chatResult.choices[0].message?.function_call;
+
+        if functionCall is chat:ChatCompletionRequestMessage_function_call {
+            messages.push({role: "assistant", content: (), function_call: functionCall});
+            
+            // Invoke the function [functionCall.name] with the arguments [functionCall.arguments] and get the output [functionOutput]
+
+            messages.push({role: "function", name: functionCall.name, content: functionOutput.toString()});
+        }
+    }
+    ```
+
 2. Use `bal run` command to compile and run the Ballerina program.
\ No newline at end of file
diff --git a/openapi/azure.openai.chat/Package.md b/openapi/azure.openai.chat/Package.md
index 93f06b221..686b1b7f0 100644
--- a/openapi/azure.openai.chat/Package.md
+++ b/openapi/azure.openai.chat/Package.md
@@ -2,10 +2,10 @@ Connects to [Azure OpenAI Chat Completions API](https://learn.microsoft.com/en-u
 
 ### Package overview
 
-The `azure.openai.chat` is a [Ballerina](https://ballerina.io/) connector for connecting to the Azure OpenAI Service REST API Chat Completions Endpoint.
+The `azure.openai.chat` is a [Ballerina](https://ballerina.io/) connector for connecting to the Azure OpenAI Service REST API Chat Completions and Chat Completions extensions Endpoints.
 
 #### Compatibility
-Azure OpenAI Service REST API: v2023-03-15-preview
+Azure OpenAI Service REST API: v2023-08-01-preview
 
 ## Report issues
 To report bugs, request new features, start new discussions, view project boards, etc., go to the [Ballerina Extended Library repository](https://github.com/ballerina-platform/ballerina-extended-library).
diff --git a/openapi/azure.openai.chat/client.bal b/openapi/azure.openai.chat/client.bal
index ee3e0a37c..96178567d 100644
--- a/openapi/azure.openai.chat/client.bal
+++ b/openapi/azure.openai.chat/client.bal
@@ -1,6 +1,9 @@
+// AUTO-GENERATED FILE. DO NOT MODIFY.
+// This file is auto-generated by the Ballerina OpenAPI tool.
+
 import ballerina/http;
 
-# This is a generated connector from [Azure OpenAI Chat Completions API v2023-03-15-preview](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#chat-completions/) OpenAPI specification.
+# This is a generated connector from [Azure OpenAI Chat Completions API v2023-08-01-preview](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#chat-completions/) OpenAPI specification.
 # The Azure Azure OpenAI Service REST API Chat Completions Endpoint will create completions for chat messages with the ChatGPT (preview) and GPT-4 (preview) models.
 @display {label: "Azure OpenAI Chat", iconPath: "icon.png"}
 public isolated client class Client {
@@ -50,7 +53,7 @@ public isolated client class Client {
     #
     # + return - OK 
     @display {label: "Create Chat Completion"}
-    resource isolated function post deployments/[string deploymentId]/chat/completions(string apiVersion, Chat_completions_body payload) returns Inline_response_200|error {
+    resource isolated function post deployments/[string deploymentId]/chat/completions(string apiVersion, CreateChatCompletionRequest payload) returns CreateChatCompletionResponse|error {
         string resourcePath = string `/deployments/${getEncodedUri(deploymentId)}/chat/completions`;
         map<any> headerValues = {};
         map<anydata> queryParam = {"api-version": apiVersion};
@@ -62,7 +65,26 @@ public isolated client class Client {
         http:Request request = new;
         json jsonBody = payload.toJson();
         request.setPayload(jsonBody, "application/json");
-        Inline_response_200 response = check self.clientEp->post(resourcePath, request, httpHeaders);
+        CreateChatCompletionResponse response = check self.clientEp->post(resourcePath, request, httpHeaders);
+        return response;
+    }
+    # Using extensions to creates a completion for the chat messages.
+    #
+    # + return - OK 
+    @display {label: "Create Extensions Chat Completion"}
+    resource isolated function post deployments/[string deploymentId]/extensions/chat/completions(string apiVersion, ExtensionsChatCompletionsRequest payload) returns ExtensionsChatCompletionsResponse|error {
+        string resourcePath = string `/deployments/${getEncodedUri(deploymentId)}/extensions/chat/completions`;
+        map<any> headerValues = {};
+        map<anydata> queryParam = {"api-version": apiVersion};
+        if self.apiKeyConfig is ApiKeysConfig {
+            headerValues["api-key"] = self.apiKeyConfig?.apiKey;
+        }
+        resourcePath = resourcePath + check getPathForQueryParam(queryParam);
+        map<string|string[]> httpHeaders = getMapForHeaders(headerValues);
+        http:Request request = new;
+        json jsonBody = payload.toJson();
+        request.setPayload(jsonBody, "application/json");
+        ExtensionsChatCompletionsResponse response = check self.clientEp->post(resourcePath, request, httpHeaders);
         return response;
     }
 }
diff --git a/openapi/azure.openai.chat/openapi.yaml b/openapi/azure.openai.chat/openapi.yaml
index 362f3a21d..b34be8100 100644
--- a/openapi/azure.openai.chat/openapi.yaml
+++ b/openapi/azure.openai.chat/openapi.yaml
@@ -5,14 +5,14 @@ info:
       iconPath: "icon.png"
   title: Azure OpenAI Service API
   description: >
-    This is a generated connector from [Azure OpenAI Chat Completions API v2023-03-15-preview](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#chat-completions/) OpenAPI specification.
+    This is a generated connector from [Azure OpenAI Chat Completions API v2023-08-01-preview](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#chat-completions/) OpenAPI specification.
 
     The Azure Azure OpenAI Service REST API Chat Completions Endpoint will create completions for chat messages with the ChatGPT (preview) and GPT-4 (preview) models.
   x-ballerina-init-description: >
     The connector initialization requires setting the API credentials.
 
     Create an [Azure](https://azure.microsoft.com/en-us/features/azure-portal/) account, an [Azure OpenAI resource with a model deployed](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource) and refer [this guide](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#authentication) to learn how to generate and use tokens
-  version: 2023-03-15-preview
+  version: 2023-08-01-preview
 servers:
   - url: https://{endpoint}/openai
     variables:
@@ -41,210 +41,562 @@ paths:
           required: true
           schema:
             type: string
-            example: 2023-03-15-preview
+            example: 2023-08-01-preview
             description: api version
       requestBody:
         required: true
         content:
           application/json:
             schema:
-              type: object
-              properties:
-                messages:
-                  description: The messages to generate chat completions for, in the chat format.
-                  type: array
-                  minItems: 1
-                  items:
-                    type: object
-                    properties:
-                      role:
-                        type: string
-                        enum:
-                          - system
-                          - user
-                          - assistant
-                        description: The role of the author of this message.
-                      content:
-                        type: string
-                        description: The contents of the message
-                      name:
-                        type: string
-                        description: The name of the user in a multi-user chat
-                    required:
-                      - role
-                      - content
-                temperature:
-                  description: |-
-                    What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
-                    We generally recommend altering this or `top_p` but not both.
-                  type: number
-                  minimum: 0
-                  maximum: 2
-                  default: 1
-                  example: 1
-                  nullable: true
-                top_p:
-                  description: |-
-                    An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
-                    We generally recommend altering this or `temperature` but not both.
-                  type: number
-                  minimum: 0
-                  maximum: 1
-                  default: 1
-                  example: 1
-                  nullable: true
-                'n':
-                  description: How many chat completion choices to generate for each input message.
-                  type: integer
-                  minimum: 1
-                  maximum: 128
-                  default: 1
-                  example: 1
-                  nullable: true
-                stream:
-                  description: 'If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.'
-                  type: boolean
-                  nullable: true
-                  default: false
-                stop:
-                  description: Up to 4 sequences where the API will stop generating further tokens.
-                  oneOf:
-                    - type: string
-                      nullable: true
-                    - type: array
-                      items:
-                        type: string
-                        nullable: false
-                      minItems: 1
-                      maxItems: 4
-                      description: Array minimum size of 1 and maximum of 4
-                  default: null
-                max_tokens:
-                  description: The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens).
-                  type: integer
-                  default: inf
-                presence_penalty:
-                  description: Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
-                  type: number
-                  default: 0
-                  minimum: -2
-                  maximum: 2
-                frequency_penalty:
-                  description: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
-                  type: number
-                  default: 0
-                  minimum: -2
-                  maximum: 2
-                logit_bias:
-                  description: Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
-                  type: object
-                  nullable: true
-                user:
-                  description: A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.
-                  type: string
-                  example: user-1234
-                  nullable: false
-              required:
-                - messages
-            example:
-              model: gpt-35-turbo
-              messages:
-                - role: user
-                  content: Hello!
+              $ref: '#/components/schemas/createChatCompletionRequest'
       responses:
         '200':
           description: OK
           content:
             application/json:
               schema:
-                type: object
-                properties:
-                  id:
-                    type: string
-                  object:
-                    type: string
-                  created:
-                    type: integer
-                    format: unixtime
-                  model:
-                    type: string
-                  choices:
-                    type: array
-                    items:
-                      type: object
-                      properties:
-                        index:
-                          type: integer
-                        message:
-                          type: object
-                          properties:
-                            role:
-                              type: string
-                              enum:
-                                - system
-                                - user
-                                - assistant
-                              description: The role of the author of this message.
-                            content:
-                              type: string
-                              description: The contents of the message
-                          required:
-                            - role
-                            - content
-                        finish_reason:
-                          type: string
-                  usage:
-                    type: object
-                    properties:
-                      prompt_tokens:
-                        type: integer
-                      completion_tokens:
-                        type: integer
-                      total_tokens:
-                        type: integer
-                    required:
-                      - prompt_tokens
-                      - completion_tokens
-                      - total_tokens
-                required:
-                  - id
-                  - object
-                  - created
-                  - model
-                  - choices
-              example:
-                id: chatcmpl-123
-                object: chat.completion
-                created: 1677652288
-                choices:
-                  - index: 0
-                    message:
-                      role: assistant
-                      content: |-
-
-
-                        Hello there, how may I assist you today?
-                    finish_reason: stop
-                usage:
-                  prompt_tokens: 9
-                  completion_tokens: 12
-                  total_tokens: 21
+                $ref: '#/components/schemas/createChatCompletionResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+        default:
+          description: Service unavailable
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/errorResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+  /deployments/{deployment-id}/extensions/chat/completions:
+    post:
+      x-ballerina-display:
+        label: Create Extensions Chat Completion
+      summary: Using extensions to creates a completion for the chat messages.
+      operationId: ExtensionsChatCompletions_Create
+      parameters:
+        - in: path
+          name: deployment-id
+          required: true
+          schema:
+            type: string
+            description: Deployment id of the model which was deployed.
+        - in: query
+          name: api-version
+          required: true
+          schema:
+            type: string
+            example: 2023-08-01-preview
+            description: api version
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/extensionsChatCompletionsRequest'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/extensionsChatCompletionsResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+        default:
+          description: Service unavailable
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/errorResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
 components:
   schemas:
     errorResponse:
       type: object
       properties:
         error:
+          $ref: '#/components/schemas/error'
+    errorBase:
+      type: object
+      description: Error information returned by the service.
+      properties:
+        code:
+          type: string
+          description: The error code.
+        message:
+          type: string
+          description: The error message.
+    error:
+      type: object
+      allOf:
+        - $ref: '#/components/schemas/errorBase'
+      properties:
+        code:
+          type: string
+        message:
+          type: string
+        param:
+          type: string
+        type:
+          type: string
+        inner_error:
+          $ref: '#/components/schemas/innerError'
+    innerError:
+      description: Inner error with additional details.
+      type: object
+      properties:
+        code:
+          $ref: '#/components/schemas/innerErrorCode'
+        content_filter_results:
+          $ref: '#/components/schemas/contentFilterResults'
+    innerErrorCode:
+      description: Error codes for the inner error object.
+      enum:
+        - ResponsibleAIPolicyViolation
+      type: string
+      x-ms-enum:
+        name: InnerErrorCode
+        modelAsString: true
+        values:
+          - value: ResponsibleAIPolicyViolation
+            description: The prompt violated one of more content filter rules.
+    contentFilterResult:
+      type: object
+      description: Information about the content filtering category including the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+      properties:
+        severity:
+          type: string
+          description: The severity level of the content filter result.
+          enum:
+            - safe
+            - low
+            - medium
+            - high
+          x-ms-enum:
+            name: ContentFilterSeverity
+            modelAsString: true
+            values:
+              - value: safe
+                description: General content or related content in generic or non-harmful contexts.
+              - value: low
+                description: Harmful content at a low intensity and risk level.
+              - value: medium
+                description: Harmful content at a medium intensity and risk level.
+              - value: high
+                description: Harmful content at a high intensity and risk level.
+        filtered:
+          type: boolean
+          description: Whether the content filter result has been filtered or not.
+      required:
+        - severity
+        - filtered
+    contentFilterResults:
+      type: object
+      description: Information about the content filtering category (hate, sexual, violence, self_harm), if it has been detected, as well as the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+      properties:
+        sexual:
+          $ref: '#/components/schemas/contentFilterResult'
+        violence:
+          $ref: '#/components/schemas/contentFilterResult'
+        hate:
+          $ref: '#/components/schemas/contentFilterResult'
+        self_harm:
+          $ref: '#/components/schemas/contentFilterResult'
+        error:
+          $ref: '#/components/schemas/errorBase'
+    promptFilterResult:
+      type: object
+      description: Content filtering results for a single prompt in the request.
+      properties:
+        prompt_index:
+          description: The index of the prompt in the set of prompt results.
+          type: integer
+        content_filter_results:
+          $ref: '#/components/schemas/contentFilterResults'
+    promptFilterResults:
+      type: array
+      description: Content filtering results for zero or more prompts in the request. In a streaming request, results for different prompts may arrive at different times or in different orders.
+      items:
+        $ref: '#/components/schemas/promptFilterResult'
+    chatCompletionsRequestCommon:
+      type: object
+      properties:
+        temperature:
+          description: |-
+            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+            We generally recommend altering this or `top_p` but not both.
+          type: number
+          minimum: 0
+          maximum: 2
+          default: 1
+          example: 1
+          nullable: true
+        top_p:
+          description: |-
+            An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+            We generally recommend altering this or `temperature` but not both.
+          type: number
+          minimum: 0
+          maximum: 1
+          default: 1
+          example: 1
+          nullable: true
+        stream:
+          description: 'If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.'
+          type: boolean
+          nullable: true
+          default: false
+        stop:
+          description: Up to 4 sequences where the API will stop generating further tokens.
+          oneOf:
+            - type: string
+              nullable: true
+            - type: array
+              items:
+                type: string
+                nullable: false
+              minItems: 1
+              maxItems: 4
+              description: Array minimum size of 1 and maximum of 4
+          default: null
+        max_tokens:
+          description: The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens).
+          type: integer
+          default: 4096
+        presence_penalty:
+          description: Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
+          type: number
+          default: 0
+          minimum: -2
+          maximum: 2
+        frequency_penalty:
+          description: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+          type: number
+          default: 0
+          minimum: -2
+          maximum: 2
+        logit_bias:
+          description: Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
           type: object
+          nullable: true
+        user:
+          description: A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.
+          type: string
+          example: user-1234
+          nullable: false
+    createChatCompletionRequest:
+      type: object
+      allOf:
+        - $ref: '#/components/schemas/chatCompletionsRequestCommon'
+        - properties:
+            messages:
+              description: A list of messages comprising the conversation so far. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
+              type: array
+              minItems: 1
+              items:
+                $ref: '#/components/schemas/chatCompletionRequestMessage'
+            functions:
+              description: A list of functions the model may generate JSON inputs for.
+              type: array
+              minItems: 1
+              items:
+                $ref: '#/components/schemas/chatCompletionFunctions'
+            function_call:
+              description: Controls how the model responds to function calls. "none" means the model does not call a function, and responds to the end-user. "auto" means the model can pick between an end-user or calling a function.  Specifying a particular function via `{"name":\ "my_function"}` forces the model to call that function. "none" is the default when no functions are present. "auto" is the default if functions are present.
+              oneOf:
+                - type: string
+                  enum:
+                    - none
+                    - auto
+                - type: object
+                  properties:
+                    name:
+                      type: string
+                      description: The name of the function to call.
+                  required:
+                    - name
+            'n':
+              type: integer
+              minimum: 1
+              maximum: 128
+              default: 1
+              example: 1
+              nullable: true
+              description: How many chat completion choices to generate for each input message.
+          required:
+            - messages  
+    chatCompletionFunctions:
+      type: object
+      properties:
+        name:
+          type: string
+          description: The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
+        description:
+          type: string
+          description: The description of what the function does.
+        parameters:
+          $ref: '#/components/schemas/chatCompletionFunctionParameters'
+      required:
+        - name
+    chatCompletionFunctionParameters:
+      type: object
+      description: The parameters the functions accepts, described as a JSON Schema object. See the [guide](/docs/guides/gpt/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.
+      additionalProperties: true
+    chatCompletionRequestMessage:
+      type: object
+      properties:
+        role:
+          type: string
+          enum:
+            - system
+            - user
+            - assistant
+            - function
+          description: The role of the messages author. One of `system`, `user`, `assistant`, or `function`.
+        content:
+          type: string
+          description: The contents of the message. `content` is required for all messages except assistant messages with function calls.
+          nullable: true
+        name:
+          type: string
+          description: The name of the author of this message. `name` is required if role is `function`, and it should be the name of the function whose response is in the `content`. May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters.
+        function_call:
+          type: object
+          description: The name and arguments of a function that should be called, as generated by the model.
           properties:
-            code:
+            name:
               type: string
-            message:
+              description: The name of the function to call.
+            arguments:
               type: string
-            param:
+              description: The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
+      required:
+        - role
+        - content
+    createChatCompletionResponse:
+      type: object
+      allOf:
+        - $ref: '#/components/schemas/chatCompletionsResponseCommon'
+        - properties:
+            choices:
+              type: array
+              items:
+                type: object
+                allOf:
+                  - $ref: '#/components/schemas/chatCompletionChoiceCommon'
+                  - properties:
+                      message:
+                        $ref: '#/components/schemas/chatCompletionResponseMessage'
+                      content_filter_results:
+                        $ref: '#/components/schemas/contentFilterResults'
+          required:
+          - choices
+    chatCompletionResponseMessage:
+      type: object
+      properties:
+        role:
+          type: string
+          enum:
+            - system
+            - user
+            - assistant
+            - function
+          description: The role of the author of this message.
+        content:
+          type: string
+          description: The contents of the message.
+        function_call:
+          type: object
+          description: The name and arguments of a function that should be called, as generated by the model.
+          properties:
+            name:
               type: string
-            type:
+              description: The name of the function to call.
+            arguments:
               type: string
+              description: The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
+      required:
+        - role
+    extensionsChatCompletionsRequest:
+      type: object
+      description: Request for the chat completions using extensions
+      allOf:
+        - $ref: '#/components/schemas/chatCompletionsRequestCommon'
+        - properties:
+            messages:
+              type: array
+              description: A list of messages comprising the conversation so far. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
+              items:
+                $ref: '#/components/schemas/message'
+            dataSources:
+              type: array
+              description: The data sources to be used for the Azure OpenAI on your data feature.
+              items:
+                $ref: '#/components/schemas/dataSource'
+          required:
+            - messages
+      example:
+        dataSources:
+          - type: AzureCognitiveSearch
+            parameters:
+              endpoint: https://mysearchexample.search.windows.net
+              key: '***(admin key)'
+              indexName: my-chunk-index
+              fieldsMapping:
+                titleField: productName
+                urlField: productUrl
+                filepathField: productFilePath
+                contentFields:
+                  - productDescription
+                contentFieldsSeparator: |+
+
+              topNDocuments: 5
+              queryType: semantic
+              semanticConfiguration: defaultConfiguration
+              inScope: true
+              roleInformation: roleInformation
+        messages:
+          - role: user
+            content: Where can I find a hiking place in Seattle?
+        temperature: 0.9
+    dataSource:
+      type: object
+      description: The data source to be used for the Azure OpenAI on your data feature.
+      properties:
+        type:
+          type: string
+          description: The data source type.
+        parameters:
+          type: object
+          description: The parameters to be used for the data source in runtime.
+          additionalProperties: true
+      required:
+        - type
+    message:
+      type: object
+      description: A chat message.
+      properties:
+        index:
+          type: integer
+          description: The index of the message in the conversation.
+        role:
+          type: string
+          enum:
+            - system
+            - user
+            - assistant
+            - tool
+          description: The role of the author of this message.
+        recipient:
+          type: string
+          example: Contoso.productsUsingGET
+          description: The recipient of the message in the format of <namespace>.<operation>. Present if and only if the recipient is tool.
+        content:
+          type: string
+          description: The contents of the message
+        end_turn:
+          type: boolean
+          description: Whether the message ends the turn.
+        context:
+          type: object
+          description: The conversation context
+          nullable: true
+          properties:
+            messages:
+              type: array
+              description: Messages exchanged between model and extensions prior to final message from model
+              minItems: 1
+              items:
+                $ref: '#/components/schemas/message'
+              nullable: true
+      required:
+        - role
+        - content
+    chatCompletionsResponseCommon:
+      type: object
+      properties:
+        id:
+          type: string
+        object:
+          type: string
+        created:
+          type: integer
+          format: unixtime
+        model:
+          type: string
+        usage:
+          type: object
+          properties:
+            prompt_tokens:
+              type: integer
+            completion_tokens:
+              type: integer
+            total_tokens:
+              type: integer
+          required:
+            - prompt_tokens
+            - completion_tokens
+            - total_tokens
+      required:
+        - id
+        - object
+        - created
+        - model
+    chatCompletionChoiceCommon:
+      type: object
+      properties:
+        index:
+          type: integer
+        finish_reason:
+          type: string
+    extensionsChatCompletionChoice:
+      type: object
+      allOf:
+        - $ref: '#/components/schemas/chatCompletionChoiceCommon'
+        - properties:
+            message:
+              description: The message returned by the service.
+              $ref: '#/components/schemas/message'
+    extensionsChatCompletionsResponse:
+      type: object
+      description: The response of the extensions chat completions.
+      allOf:
+        - $ref: '#/components/schemas/chatCompletionsResponseCommon'
+        - properties:
+            choices:
+              type: array
+              description: A list of chat completion choices.
+              items:
+                $ref: '#/components/schemas/extensionsChatCompletionChoice'
+      example:
+        id: '1'
+        object: extensions.chat.completion
+        created: 1679201802
+        model: gpt-3.5-turbo-0301
+        choices:
+          - index: 0
+            finish_reason: stop
+            message:
+              role: assistant
+              content: Seattle is a great place for hiking! Here are some of the best hiking places in Seattle according to Contoso Traveler [doc1] and West Coast Traveler, Snow Lake, Mount Si, and Mount Tenerife [doc2]. I hope this helps! Let me know if you need more information.
+              end_turn: true
+              context:
+                messages:
+                  - role: tool
+                    content: '{"citations":[{"filepath":"ContosoTraveler.pdf","content":"This is the content of the citation 1"},{"filepath":"WestCoastTraveler.html","content":"This is the content of the citation 2"},{"content":"This is the content of the citation 3 without filepath"}],"intent":"hiking place in seattle"}'
+                    end_turn: false
   securitySchemes:
     bearer:
       type: oauth2
@@ -258,4 +610,4 @@ components:
       type: apiKey
       description: The key used to access the OpenAI APIs
       name: api-key
-      in: header
\ No newline at end of file
+      in: header
diff --git a/openapi/azure.openai.chat/original-openapi.yaml b/openapi/azure.openai.chat/original-openapi.yaml
index 2578561ea..15277878b 100644
--- a/openapi/azure.openai.chat/original-openapi.yaml
+++ b/openapi/azure.openai.chat/original-openapi.yaml
@@ -1,817 +1,931 @@
-{
-  "openapi": "3.0.0",
-  "info": {
-    "title": "Azure OpenAI Service API",
-    "description": "Azure OpenAI APIs for completions and search",
-    "version": "2023-03-15-preview"
-  },
-  "servers": [
-    {
-      "url": "https://{endpoint}/openai",
-      "variables": {
-        "endpoint": {
-          "default": "your-resource-name.openai.azure.com"
-        }
-      }
-    }
-  ],
-  "security": [
-    {
-      "bearer": [
-        "api.read"
-      ]
-    },
-    {
-      "apiKey": []
-    }
-  ],
-  "paths": {
-    "/deployments/{deployment-id}/completions": {
-      "post": {
-        "summary": "Creates a completion for the provided prompt, parameters and chosen model.",
-        "operationId": "Completions_Create",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "deployment-id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "davinci",
-              "description": "Deployment id of the model which was deployed."
-            }
-          },
-          {
-            "in": "query",
-            "name": "api-version",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "2023-03-15-preview",
-              "description": "api version"
-            }
-          }
-        ],
-        "requestBody": {
-          "required": true,
-          "content": {
-            "application/json": {
-              "schema": {
-                "type": "object",
-                "properties": {
-                  "prompt": {
-                    "description": "The prompt(s) to generate completions for, encoded as a string or array of strings.\nNote that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. Maximum allowed size of string list is 2048.",
-                    "oneOf": [
-                      {
-                        "type": "string",
-                        "default": "",
-                        "example": "This is a test.",
-                        "nullable": true
-                      },
-                      {
-                        "type": "array",
-                        "items": {
-                          "type": "string",
-                          "default": "",
-                          "example": "This is a test.",
-                          "nullable": false
-                        },
-                        "description": "Array size minimum of 1 and maximum of 2048"
-                      }
-                    ]
-                  },
-                  "max_tokens": {
-                    "description": "The token count of your prompt plus max_tokens cannot exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096). Has minimum of 0.",
-                    "type": "integer",
-                    "default": 16,
-                    "example": 16,
-                    "nullable": true
-                  },
-                  "temperature": {
-                    "description": "What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.\nWe generally recommend altering this or top_p but not both.",
-                    "type": "number",
-                    "default": 1,
-                    "example": 1,
-                    "nullable": true
-                  },
-                  "top_p": {
-                    "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or temperature but not both.",
-                    "type": "number",
-                    "default": 1,
-                    "example": 1,
-                    "nullable": true
-                  },
-                  "logit_bias": {
-                    "description": "Defaults to null. Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass {\"50256\" &#58; -100} to prevent the <|endoftext|> token from being generated.",
-                    "type": "object",
-                    "nullable": false
-                  },
-                  "user": {
-                    "description": "A unique identifier representing your end-user, which can help monitoring and detecting abuse",
-                    "type": "string",
-                    "nullable": false
-                  },
-                  "n": {
-                    "description": "How many completions to generate for each prompt. Minimum of 1 and maximum of 128 allowed.\nNote: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop.",
-                    "type": "integer",
-                    "default": 1,
-                    "example": 1,
-                    "nullable": true
-                  },
-                  "stream": {
-                    "description": "Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.",
-                    "type": "boolean",
-                    "nullable": true,
-                    "default": false
-                  },
-                  "logprobs": {
-                    "description": "Include the log probabilities on the logprobs most likely tokens, as well the chosen tokens. For example, if logprobs is 5, the API will return a list of the 5 most likely tokens. The API will always return the logprob of the sampled token, so there may be up to logprobs+1 elements in the response.\nMinimum of 0 and maximum of 5 allowed.",
-                    "type": "integer",
-                    "default": null,
-                    "nullable": true
-                  },
-                  "model": {
-                    "type": "string",
-                    "example": "davinci",
-                    "nullable": true,
-                    "description": "ID of the model to use. You can use the Models_List operation to see all of your available models, or see our Models_Get overview for descriptions of them."
-                  },
-                  "suffix": {
-                    "type": "string",
-                    "nullable": true,
-                    "description": "The suffix that comes after a completion of inserted text."
-                  },
-                  "echo": {
-                    "description": "Echo back the prompt in addition to the completion",
-                    "type": "boolean",
-                    "default": false,
-                    "nullable": true
-                  },
-                  "stop": {
-                    "description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.",
-                    "oneOf": [
-                      {
-                        "type": "string",
-                        "default": "<|endoftext|>",
-                        "example": "\n",
-                        "nullable": true
-                      },
-                      {
-                        "type": "array",
-                        "items": {
-                          "type": "string",
-                          "example": [
-                            "\n"
-                          ],
-                          "nullable": false
-                        },
-                        "description": "Array minimum size of 1 and maximum of 4"
-                      }
-                    ]
-                  },
-                  "completion_config": {
-                    "type": "string",
-                    "nullable": true
-                  },
-                  "cache_level": {
-                    "description": "can be used to disable any server-side caching, 0=no cache, 1=prompt prefix enabled, 2=full cache",
-                    "type": "integer",
-                    "nullable": true
-                  },
-                  "presence_penalty": {
-                    "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
-                    "type": "number",
-                    "default": 0
-                  },
-                  "frequency_penalty": {
-                    "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
-                    "type": "number",
-                    "default": 0
-                  },
-                  "best_of": {
-                    "description": "Generates best_of completions server-side and returns the \"best\" (the one with the highest log probability per token). Results cannot be streamed.\nWhen used with n, best_of controls the number of candidate completions and n specifies how many to return – best_of must be greater than n.\nNote: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop. Has maximum value of 128.",
-                    "type": "integer"
-                  }
-                }
-              },
-              "example": {
-                "prompt": "Negate the following sentence.The price for bubblegum increased on thursday.\n\n Negated Sentence:",
-                "max_tokens": 50
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "type": "object",
-                  "properties": {
-                    "id": {
-                      "type": "string"
-                    },
-                    "object": {
-                      "type": "string"
-                    },
-                    "created": {
-                      "type": "integer"
-                    },
-                    "model": {
-                      "type": "string"
-                    },
-                    "choices": {
-                      "type": "array",
-                      "items": {
-                        "type": "object",
-                        "properties": {
-                          "text": {
-                            "type": "string"
-                          },
-                          "index": {
-                            "type": "integer"
-                          },
-                          "logprobs": {
-                            "type": "object",
-                            "nullable": true,
-                            "properties": {
-                              "tokens": {
-                                "type": "array",
-                                "items": {
-                                  "type": "string"
-                                }
-                              },
-                              "token_logprobs": {
-                                "type": "array",
-                                "items": {
-                                  "type": "number"
-                                }
-                              },
-                              "top_logprobs": {
-                                "type": "array",
-                                "items": {
-                                  "type": "object",
-                                  "additionalProperties": {
-                                    "type": "number"
-                                  }
-                                }
-                              },
-                              "text_offset": {
-                                "type": "array",
-                                "items": {
-                                  "type": "integer"
-                                }
-                              }
-                            }
-                          },
-                          "finish_reason": {
-                            "type": "string"
-                          }
-                        }
-                      }
-                    },
-                    "usage": {
-                      "type": "object",
-                      "properties": {
-                        "completion_tokens": {
-                          "type": "number",
-                          "format": "integer"
-                        },
-                        "prompt_tokens": {
-                          "type": "number",
-                          "format": "integer"
-                        },
-                        "total_tokens": {
-                          "type": "number",
-                          "format": "integer"
-                        }
-                      },
-                      "required": [
-                        "prompt_tokens",
-                        "total_tokens",
-                        "completion_tokens"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "id",
-                    "object",
-                    "created",
-                    "model",
-                    "choices"
-                  ]
-                },
-                "example": {
-                  "model": "davinci",
-                  "object": "text_completion",
-                  "id": "cmpl-4509KAos68kxOqpE2uYGw81j6m7uo",
-                  "created": 1637097562,
-                  "choices": [
-                    {
-                      "index": 0,
-                      "text": "The price for bubblegum decreased on thursday.",
-                      "logprobs": null,
-                      "finish_reason": "stop"
-                    }
-                  ]
-                }
-              }
-            },
-            "headers": {
-              "apim-request-id": {
-                "description": "Request ID for troubleshooting purposes",
-                "schema": {
-                  "type": "string"
-                }
-              }
-            }
-          },
-          "default": {
-            "description": "Service unavailable",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/errorResponse"
-                }
-              }
-            },
-            "headers": {
-              "apim-request-id": {
-                "description": "Request ID for troubleshooting purposes",
-                "schema": {
-                  "type": "string"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/deployments/{deployment-id}/embeddings": {
-      "post": {
-        "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.",
-        "operationId": "embeddings_create",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "deployment-id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "ada-search-index-v1"
-            },
-            "description": "The deployment id of the model which was deployed."
-          },
-          {
-            "in": "query",
-            "name": "api-version",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "2023-03-15-preview",
-              "description": "api version"
-            }
-          }
-        ],
-        "requestBody": {
-          "required": true,
-          "content": {
-            "application/json": {
-              "schema": {
-                "type": "object",
-                "additionalProperties": true,
-                "properties": {
-                  "input": {
-                    "description": "Input text to get embeddings for, encoded as a string. To get embeddings for multiple inputs in a single request, pass an array of strings. Each input must not exceed 2048 tokens in length.\nUnless you are embedding code, we suggest replacing newlines (\\n) in your input with a single space, as we have observed inferior results when newlines are present.",
-                    "oneOf": [
-                      {
-                        "type": "string",
-                        "default": "",
-                        "example": "This is a test.",
-                        "nullable": true
-                      },
-                      {
-                        "type": "array",
-                        "minItems": 1,
-                        "maxItems": 2048,
-                        "items": {
-                          "type": "string",
-                          "minLength": 1,
-                          "example": "This is a test.",
-                          "nullable": false
-                        }
-                      }
-                    ]
-                  },
-                  "user": {
-                    "description": "A unique identifier representing your end-user, which can help monitoring and detecting abuse.",
-                    "type": "string",
-                    "nullable": false
-                  },
-                  "input_type": {
-                    "description": "input type of embedding search to use",
-                    "type": "string",
-                    "example": "query"
-                  },
-                  "model": {
-                    "type": "string",
-                    "description": "ID of the model to use. You can use the Models_List operation to see all of your available models, or see our Models_Get overview for descriptions of them.",
-                    "nullable": false
-                  }
-                },
-                "required": [
-                  "input"
-                ]
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "type": "object",
-                  "properties": {
-                    "object": {
-                      "type": "string"
-                    },
-                    "model": {
-                      "type": "string"
-                    },
-                    "data": {
-                      "type": "array",
-                      "items": {
-                        "type": "object",
-                        "properties": {
-                          "index": {
-                            "type": "integer"
-                          },
-                          "object": {
-                            "type": "string"
-                          },
-                          "embedding": {
-                            "type": "array",
-                            "items": {
-                              "type": "number"
-                            }
-                          }
-                        },
-                        "required": [
-                          "index",
-                          "object",
-                          "embedding"
-                        ]
-                      }
-                    },
-                    "usage": {
-                      "type": "object",
-                      "properties": {
-                        "prompt_tokens": {
-                          "type": "integer"
-                        },
-                        "total_tokens": {
-                          "type": "integer"
-                        }
-                      },
-                      "required": [
-                        "prompt_tokens",
-                        "total_tokens"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "object",
-                    "model",
-                    "data",
-                    "usage"
-                  ]
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/deployments/{deployment-id}/chat/completions": {
-      "post": {
-        "summary": "Creates a completion for the chat message",
-        "operationId": "ChatCompletions_Create",
-        "parameters": [
-          {
-            "in": "path",
-            "name": "deployment-id",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "description": "Deployment id of the model which was deployed."
-            }
-          },
-          {
-            "in": "query",
-            "name": "api-version",
-            "required": true,
-            "schema": {
-              "type": "string",
-              "example": "2023-03-15-preview",
-              "description": "api version"
-            }
-          }
-        ],
-        "requestBody": {
-          "required": true,
-          "content": {
-            "application/json": {
-              "schema": {
-                "type": "object",
-                "properties": {
-                  "messages": {
-                    "description": "The messages to generate chat completions for, in the chat format.",
-                    "type": "array",
-                    "minItems": 1,
-                    "items": {
-                      "type": "object",
-                      "properties": {
-                        "role": {
-                          "type": "string",
-                          "enum": [
-                            "system",
-                            "user",
-                            "assistant"
-                          ],
-                          "description": "The role of the author of this message."
-                        },
-                        "content": {
-                          "type": "string",
-                          "description": "The contents of the message"
-                        },
-                        "name": {
-                          "type": "string",
-                          "description": "The name of the user in a multi-user chat"
-                        }
-                      },
-                      "required": [
-                        "role",
-                        "content"
-                      ]
-                    }
-                  },
-                  "temperature": {
-                    "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\nWe generally recommend altering this or `top_p` but not both.",
-                    "type": "number",
-                    "minimum": 0,
-                    "maximum": 2,
-                    "default": 1,
-                    "example": 1,
-                    "nullable": true
-                  },
-                  "top_p": {
-                    "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or `temperature` but not both.",
-                    "type": "number",
-                    "minimum": 0,
-                    "maximum": 1,
-                    "default": 1,
-                    "example": 1,
-                    "nullable": true
-                  },
-                  "n": {
-                    "description": "How many chat completion choices to generate for each input message.",
-                    "type": "integer",
-                    "minimum": 1,
-                    "maximum": 128,
-                    "default": 1,
-                    "example": 1,
-                    "nullable": true
-                  },
-                  "stream": {
-                    "description": "If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.",
-                    "type": "boolean",
-                    "nullable": true,
-                    "default": false
-                  },
-                  "stop": {
-                    "description": "Up to 4 sequences where the API will stop generating further tokens.",
-                    "oneOf": [
-                      {
-                        "type": "string",
-                        "nullable": true
-                      },
-                      {
-                        "type": "array",
-                        "items": {
-                          "type": "string",
-                          "nullable": false
-                        },
-                        "minItems": 1,
-                        "maxItems": 4,
-                        "description": "Array minimum size of 1 and maximum of 4"
-                      }
-                    ],
-                    "default": null
-                  },
-                  "max_tokens": {
-                    "description": "The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens).",
-                    "type": "integer",
-                    "default": "inf"
-                  },
-                  "presence_penalty": {
-                    "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
-                    "type": "number",
-                    "default": 0,
-                    "minimum": -2,
-                    "maximum": 2
-                  },
-                  "frequency_penalty": {
-                    "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
-                    "type": "number",
-                    "default": 0,
-                    "minimum": -2,
-                    "maximum": 2
-                  },
-                  "logit_bias": {
-                    "description": "Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.",
-                    "type": "object",
-                    "nullable": true
-                  },
-                  "user": {
-                    "description": "A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.",
-                    "type": "string",
-                    "example": "user-1234",
-                    "nullable": false
-                  }
-                },
-                "required": [
-                  "messages"
-                ]
-              },
-              "example": {
-                "model": "gpt-35-turbo",
-                "messages": [
-                  {
-                    "role": "user",
-                    "content": "Hello!"
-                  }
-                ]
-              }
-            }
-          }
-        },
-        "responses": {
-          "200": {
-            "description": "OK",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "type": "object",
-                  "properties": {
-                    "id": {
-                      "type": "string"
-                    },
-                    "object": {
-                      "type": "string"
-                    },
-                    "created": {
-                      "type": "integer",
-                      "format": "unixtime"
-                    },
-                    "model": {
-                      "type": "string"
-                    },
-                    "choices": {
-                      "type": "array",
-                      "items": {
-                        "type": "object",
-                        "properties": {
-                          "index": {
-                            "type": "integer"
-                          },
-                          "message": {
-                            "type": "object",
-                            "properties": {
-                              "role": {
-                                "type": "string",
-                                "enum": [
-                                  "system",
-                                  "user",
-                                  "assistant"
-                                ],
-                                "description": "The role of the author of this message."
-                              },
-                              "content": {
-                                "type": "string",
-                                "description": "The contents of the message"
-                              }
-                            },
-                            "required": [
-                              "role",
-                              "content"
-                            ]
-                          },
-                          "finish_reason": {
-                            "type": "string"
-                          }
-                        }
-                      }
-                    },
-                    "usage": {
-                      "type": "object",
-                      "properties": {
-                        "prompt_tokens": {
-                          "type": "integer"
-                        },
-                        "completion_tokens": {
-                          "type": "integer"
-                        },
-                        "total_tokens": {
-                          "type": "integer"
-                        }
-                      },
-                      "required": [
-                        "prompt_tokens",
-                        "completion_tokens",
-                        "total_tokens"
-                      ]
-                    }
-                  },
-                  "required": [
-                    "id",
-                    "object",
-                    "created",
-                    "model",
-                    "choices"
-                  ]
-                },
-                "example": {
-                  "id": "chatcmpl-123",
-                  "object": "chat.completion",
-                  "created": 1677652288,
-                  "choices": [
-                    {
-                      "index": 0,
-                      "message": {
-                        "role": "assistant",
-                        "content": "\n\nHello there, how may I assist you today?"
-                      },
-                      "finish_reason": "stop"
-                    }
-                  ],
-                  "usage": {
-                    "prompt_tokens": 9,
-                    "completion_tokens": 12,
-                    "total_tokens": 21
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  },
-  "components": {
-    "schemas": {
-      "errorResponse": {
-        "type": "object",
-        "properties": {
-          "error": {
-            "type": "object",
-            "properties": {
-              "code": {
-                "type": "string"
-              },
-              "message": {
-                "type": "string"
-              },
-              "param": {
-                "type": "string"
-              },
-              "type": {
-                "type": "string"
-              }
-            }
-          }
-        }
-      }
-    },
-    "securitySchemes": {
-      "bearer": {
-        "type": "oauth2",
-        "flows": {
-          "implicit": {
-            "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize",
-            "scopes": {}
-          }
-        },
-        "x-tokenInfoFunc": "api.middleware.auth.bearer_auth",
-        "x-scopeValidateFunc": "api.middleware.auth.validate_scopes"
-      },
-      "apiKey": {
-        "type": "apiKey",
-        "name": "api-key",
-        "in": "header"
-      }
-    }
-  }
-}
+openapi: 3.0.0
+info:
+  title: Azure OpenAI Service API
+  description: Azure OpenAI APIs for completions and search
+  version: 2023-08-01-preview
+servers:
+  - url: https://{endpoint}/openai
+    variables:
+      endpoint:
+        default: your-resource-name.openai.azure.com
+security:
+  - bearer:
+      - api.read
+  - apiKey: []
+paths:
+  /deployments/{deployment-id}/completions:
+    post:
+      summary: Creates a completion for the provided prompt, parameters and chosen model.
+      operationId: Completions_Create
+      parameters:
+        - in: path
+          name: deployment-id
+          required: true
+          schema:
+            type: string
+            example: davinci
+            description: Deployment id of the model which was deployed.
+        - in: query
+          name: api-version
+          required: true
+          schema:
+            type: string
+            example: 2023-08-01-preview
+            description: api version
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                prompt:
+                  description: |-
+                    The prompt(s) to generate completions for, encoded as a string or array of strings.
+                    Note that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. Maximum allowed size of string list is 2048.
+                  oneOf:
+                    - type: string
+                      default: ''
+                      example: This is a test.
+                      nullable: true
+                    - type: array
+                      items:
+                        type: string
+                        default: ''
+                        example: This is a test.
+                        nullable: false
+                      description: Array size minimum of 1 and maximum of 2048
+                max_tokens:
+                  description: The token count of your prompt plus max_tokens cannot exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096). Has minimum of 0.
+                  type: integer
+                  default: 16
+                  example: 16
+                  nullable: true
+                temperature:
+                  description: |-
+                    What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
+                    We generally recommend altering this or top_p but not both.
+                  type: number
+                  default: 1
+                  example: 1
+                  nullable: true
+                top_p:
+                  description: |-
+                    An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+                    We generally recommend altering this or temperature but not both.
+                  type: number
+                  default: 1
+                  example: 1
+                  nullable: true
+                logit_bias:
+                  description: Defaults to null. Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass {"50256" &#58; -100} to prevent the <|endoftext|> token from being generated.
+                  type: object
+                  nullable: false
+                user:
+                  description: A unique identifier representing your end-user, which can help monitoring and detecting abuse
+                  type: string
+                  nullable: false
+                'n':
+                  description: |-
+                    How many completions to generate for each prompt. Minimum of 1 and maximum of 128 allowed.
+                    Note: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop.
+                  type: integer
+                  default: 1
+                  example: 1
+                  nullable: true
+                stream:
+                  description: 'Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.'
+                  type: boolean
+                  nullable: true
+                  default: false
+                logprobs:
+                  description: |-
+                    Include the log probabilities on the logprobs most likely tokens, as well the chosen tokens. For example, if logprobs is 5, the API will return a list of the 5 most likely tokens. The API will always return the logprob of the sampled token, so there may be up to logprobs+1 elements in the response.
+                    Minimum of 0 and maximum of 5 allowed.
+                  type: integer
+                  default: null
+                  nullable: true
+                suffix:
+                  type: string
+                  nullable: true
+                  description: The suffix that comes after a completion of inserted text.
+                echo:
+                  description: Echo back the prompt in addition to the completion
+                  type: boolean
+                  default: false
+                  nullable: true
+                stop:
+                  description: Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
+                  oneOf:
+                    - type: string
+                      default: <|endoftext|>
+                      example: |+
+
+                      nullable: true
+                    - type: array
+                      items:
+                        type: string
+                        example: |+
+
+                        nullable: false
+                      description: Array minimum size of 1 and maximum of 4
+                completion_config:
+                  type: string
+                  nullable: true
+                presence_penalty:
+                  description: Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
+                  type: number
+                  default: 0
+                frequency_penalty:
+                  description: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+                  type: number
+                  default: 0
+                best_of:
+                  description: |-
+                    Generates best_of completions server-side and returns the "best" (the one with the highest log probability per token). Results cannot be streamed.
+                    When used with n, best_of controls the number of candidate completions and n specifies how many to return - best_of must be greater than n.
+                    Note: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop. Has maximum value of 128.
+                  type: integer
+            example:
+              prompt: |-
+                Negate the following sentence.The price for bubblegum increased on thursday.
+
+                 Negated Sentence:
+              max_tokens: 50
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  id:
+                    type: string
+                  object:
+                    type: string
+                  created:
+                    type: integer
+                  model:
+                    type: string
+                  prompt_filter_results:
+                    $ref: '#/components/schemas/promptFilterResults'
+                  choices:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        text:
+                          type: string
+                        index:
+                          type: integer
+                        logprobs:
+                          type: object
+                          properties:
+                            tokens:
+                              type: array
+                              items:
+                                type: string
+                            token_logprobs:
+                              type: array
+                              items:
+                                type: number
+                            top_logprobs:
+                              type: array
+                              items:
+                                type: object
+                                additionalProperties:
+                                  type: number
+                            text_offset:
+                              type: array
+                              items:
+                                type: integer
+                          nullable: true
+                        finish_reason:
+                          type: string
+                        content_filter_results:
+                          $ref: '#/components/schemas/contentFilterResults'
+                  usage:
+                    type: object
+                    properties:
+                      completion_tokens:
+                        type: number
+                        format: int32
+                      prompt_tokens:
+                        type: number
+                        format: int32
+                      total_tokens:
+                        type: number
+                        format: int32
+                    required:
+                      - prompt_tokens
+                      - total_tokens
+                      - completion_tokens
+                required:
+                  - id
+                  - object
+                  - created
+                  - model
+                  - choices
+              example:
+                model: davinci
+                object: text_completion
+                id: cmpl-4509KAos68kxOqpE2uYGw81j6m7uo
+                created: 1637097562
+                choices:
+                  - index: 0
+                    text: The price for bubblegum decreased on thursday.
+                    logprobs: null
+                    finish_reason: stop
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+        default:
+          description: Service unavailable
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/errorResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+  /deployments/{deployment-id}/embeddings:
+    post:
+      summary: Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.
+      operationId: embeddings_create
+      parameters:
+        - in: path
+          name: deployment-id
+          required: true
+          schema:
+            type: string
+            example: ada-search-index-v1
+          description: The deployment id of the model which was deployed.
+        - in: query
+          name: api-version
+          required: true
+          schema:
+            type: string
+            example: 2023-08-01-preview
+            description: api version
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              additionalProperties: true
+              properties:
+                input:
+                  description: |-
+                    Input text to get embeddings for, encoded as a string. To get embeddings for multiple inputs in a single request, pass an array of strings. Each input must not exceed 2048 tokens in length.
+                    Unless you are embedding code, we suggest replacing newlines (\n) in your input with a single space, as we have observed inferior results when newlines are present.
+                  oneOf:
+                    - type: string
+                      default: ''
+                      example: This is a test.
+                      nullable: true
+                    - type: array
+                      minItems: 1
+                      maxItems: 2048
+                      items:
+                        type: string
+                        minLength: 1
+                        example: This is a test.
+                        nullable: false
+                user:
+                  description: A unique identifier representing your end-user, which can help monitoring and detecting abuse.
+                  type: string
+                  nullable: false
+                input_type:
+                  description: input type of embedding search to use
+                  type: string
+                  example: query
+              required:
+                - input
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  object:
+                    type: string
+                  model:
+                    type: string
+                  data:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        index:
+                          type: integer
+                        object:
+                          type: string
+                        embedding:
+                          type: array
+                          items:
+                            type: number
+                      required:
+                        - index
+                        - object
+                        - embedding
+                  usage:
+                    type: object
+                    properties:
+                      prompt_tokens:
+                        type: integer
+                      total_tokens:
+                        type: integer
+                    required:
+                      - prompt_tokens
+                      - total_tokens
+                required:
+                  - object
+                  - model
+                  - data
+                  - usage
+  /deployments/{deployment-id}/chat/completions:
+    post:
+      summary: Creates a completion for the chat message
+      operationId: ChatCompletions_Create
+      parameters:
+        - in: path
+          name: deployment-id
+          required: true
+          schema:
+            type: string
+            description: Deployment id of the model which was deployed.
+        - in: query
+          name: api-version
+          required: true
+          schema:
+            type: string
+            example: 2023-08-01-preview
+            description: api version
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/createChatCompletionRequest'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/createChatCompletionResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+        default:
+          description: Service unavailable
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/errorResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+  /deployments/{deployment-id}/extensions/chat/completions:
+    post:
+      summary: Using extensions to creates a completion for the chat messages.
+      operationId: ExtensionsChatCompletions_Create
+      parameters:
+        - in: path
+          name: deployment-id
+          required: true
+          schema:
+            type: string
+            description: Deployment id of the model which was deployed.
+        - in: query
+          name: api-version
+          required: true
+          schema:
+            type: string
+            example: 2023-08-01-preview
+            description: api version
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/extensionsChatCompletionsRequest'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/extensionsChatCompletionsResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+        default:
+          description: Service unavailable
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/errorResponse'
+          headers:
+            apim-request-id:
+              description: Request ID for troubleshooting purposes
+              schema:
+                type: string
+components:
+  schemas:
+    errorResponse:
+      type: object
+      properties:
+        error:
+          $ref: '#/components/schemas/error'
+    errorBase:
+      type: object
+      properties:
+        code:
+          type: string
+        message:
+          type: string
+    error:
+      type: object
+      allOf:
+        - $ref: '#/components/schemas/errorBase'
+      properties:
+        code:
+          type: string
+        message:
+          type: string
+        param:
+          type: string
+        type:
+          type: string
+        inner_error:
+          $ref: '#/components/schemas/innerError'
+    innerError:
+      description: Inner error with additional details.
+      type: object
+      properties:
+        code:
+          $ref: '#/components/schemas/innerErrorCode'
+        content_filter_results:
+          $ref: '#/components/schemas/contentFilterResults'
+    innerErrorCode:
+      description: Error codes for the inner error object.
+      enum:
+        - ResponsibleAIPolicyViolation
+      type: string
+      x-ms-enum:
+        name: InnerErrorCode
+        modelAsString: true
+        values:
+          - value: ResponsibleAIPolicyViolation
+            description: The prompt violated one of more content filter rules.
+    contentFilterResult:
+      type: object
+      properties:
+        severity:
+          type: string
+          enum:
+            - safe
+            - low
+            - medium
+            - high
+          x-ms-enum:
+            name: ContentFilterSeverity
+            modelAsString: true
+            values:
+              - value: safe
+                description: General content or related content in generic or non-harmful contexts.
+              - value: low
+                description: Harmful content at a low intensity and risk level.
+              - value: medium
+                description: Harmful content at a medium intensity and risk level.
+              - value: high
+                description: Harmful content at a high intensity and risk level.
+        filtered:
+          type: boolean
+      required:
+        - severity
+        - filtered
+    contentFilterResults:
+      type: object
+      description: Information about the content filtering category (hate, sexual, violence, self_harm), if it has been detected, as well as the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+      properties:
+        sexual:
+          $ref: '#/components/schemas/contentFilterResult'
+        violence:
+          $ref: '#/components/schemas/contentFilterResult'
+        hate:
+          $ref: '#/components/schemas/contentFilterResult'
+        self_harm:
+          $ref: '#/components/schemas/contentFilterResult'
+        error:
+          $ref: '#/components/schemas/errorBase'
+    promptFilterResult:
+      type: object
+      description: Content filtering results for a single prompt in the request.
+      properties:
+        prompt_index:
+          type: integer
+        content_filter_results:
+          $ref: '#/components/schemas/contentFilterResults'
+    promptFilterResults:
+      type: array
+      description: Content filtering results for zero or more prompts in the request. In a streaming request, results for different prompts may arrive at different times or in different orders.
+      items:
+        $ref: '#/components/schemas/promptFilterResult'
+    chatCompletionsRequestCommon:
+      type: object
+      properties:
+        temperature:
+          description: |-
+            What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+            We generally recommend altering this or `top_p` but not both.
+          type: number
+          minimum: 0
+          maximum: 2
+          default: 1
+          example: 1
+          nullable: true
+        top_p:
+          description: |-
+            An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+            We generally recommend altering this or `temperature` but not both.
+          type: number
+          minimum: 0
+          maximum: 1
+          default: 1
+          example: 1
+          nullable: true
+        stream:
+          description: 'If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.'
+          type: boolean
+          nullable: true
+          default: false
+        stop:
+          description: Up to 4 sequences where the API will stop generating further tokens.
+          oneOf:
+            - type: string
+              nullable: true
+            - type: array
+              items:
+                type: string
+                nullable: false
+              minItems: 1
+              maxItems: 4
+              description: Array minimum size of 1 and maximum of 4
+          default: null
+        max_tokens:
+          description: The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens).
+          type: integer
+          default: 4096
+        presence_penalty:
+          description: Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
+          type: number
+          default: 0
+          minimum: -2
+          maximum: 2
+        frequency_penalty:
+          description: Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+          type: number
+          default: 0
+          minimum: -2
+          maximum: 2
+        logit_bias:
+          description: Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
+          type: object
+          nullable: true
+        user:
+          description: A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.
+          type: string
+          example: user-1234
+          nullable: false
+    createChatCompletionRequest:
+      type: object
+      allOf:
+        - $ref: '#/components/schemas/chatCompletionsRequestCommon'
+        - properties:
+            messages:
+              description: A list of messages comprising the conversation so far. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
+              type: array
+              minItems: 1
+              items:
+                $ref: '#/components/schemas/chatCompletionRequestMessage'
+            functions:
+              description: A list of functions the model may generate JSON inputs for.
+              type: array
+              minItems: 1
+              items:
+                $ref: '#/components/schemas/chatCompletionFunctions'
+            function_call:
+              description: Controls how the model responds to function calls. "none" means the model does not call a function, and responds to the end-user. "auto" means the model can pick between an end-user or calling a function.  Specifying a particular function via `{"name":\ "my_function"}` forces the model to call that function. "none" is the default when no functions are present. "auto" is the default if functions are present.
+              oneOf:
+                - type: string
+                  enum:
+                    - none
+                    - auto
+                - type: object
+                  properties:
+                    name:
+                      type: string
+                      description: The name of the function to call.
+                  required:
+                    - name
+            'n':
+              type: integer
+              minimum: 1
+              maximum: 128
+              default: 1
+              example: 1
+              nullable: true
+              description: How many chat completion choices to generate for each input message.
+      required:
+        - messages
+    chatCompletionFunctions:
+      type: object
+      properties:
+        name:
+          type: string
+          description: The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
+        description:
+          type: string
+          description: The description of what the function does.
+        parameters:
+          $ref: '#/components/schemas/chatCompletionFunctionParameters'
+      required:
+        - name
+    chatCompletionFunctionParameters:
+      type: object
+      description: The parameters the functions accepts, described as a JSON Schema object. See the [guide](/docs/guides/gpt/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.
+      additionalProperties: true
+    chatCompletionRequestMessage:
+      type: object
+      properties:
+        role:
+          type: string
+          enum:
+            - system
+            - user
+            - assistant
+            - function
+          description: The role of the messages author. One of `system`, `user`, `assistant`, or `function`.
+        content:
+          type: string
+          description: The contents of the message. `content` is required for all messages except assistant messages with function calls.
+        name:
+          type: string
+          description: The name of the author of this message. `name` is required if role is `function`, and it should be the name of the function whose response is in the `content`. May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters.
+        function_call:
+          type: object
+          description: The name and arguments of a function that should be called, as generated by the model.
+          properties:
+            name:
+              type: string
+              description: The name of the function to call.
+            arguments:
+              type: string
+              description: The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
+      required:
+        - role
+    createChatCompletionResponse:
+      type: object
+      allOf:
+        - $ref: '#/components/schemas/chatCompletionsResponseCommon'
+        - properties:
+            prompt_filter_results:
+              $ref: '#/components/schemas/promptFilterResults'
+            choices:
+              type: array
+              items:
+                type: object
+                allOf:
+                  - $ref: '#/components/schemas/chatCompletionChoiceCommon'
+                  - properties:
+                      message:
+                        $ref: '#/components/schemas/chatCompletionResponseMessage'
+                      content_filter_results:
+                        $ref: '#/components/schemas/contentFilterResults'
+      required:
+        - id
+        - object
+        - created
+        - model
+        - choices
+    chatCompletionResponseMessage:
+      type: object
+      properties:
+        role:
+          type: string
+          enum:
+            - system
+            - user
+            - assistant
+            - function
+          description: The role of the author of this message.
+        content:
+          type: string
+          description: The contents of the message.
+        function_call:
+          type: object
+          description: The name and arguments of a function that should be called, as generated by the model.
+          properties:
+            name:
+              type: string
+              description: The name of the function to call.
+            arguments:
+              type: string
+              description: The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
+      required:
+        - role
+    extensionsChatCompletionsRequest:
+      type: object
+      description: Request for the chat completions using extensions
+      required:
+        - messages
+      allOf:
+        - $ref: '#/components/schemas/chatCompletionsRequestCommon'
+        - properties:
+            messages:
+              type: array
+              items:
+                $ref: '#/components/schemas/message'
+            dataSources:
+              type: array
+              description: The data sources to be used for the Azure OpenAI on your data feature.
+              items:
+                $ref: '#/components/schemas/dataSource'
+      example:
+        dataSources:
+          - type: AzureCognitiveSearch
+            parameters:
+              endpoint: https://mysearchexample.search.windows.net
+              key: '***(admin key)'
+              indexName: my-chunk-index
+              fieldsMapping:
+                titleField: productName
+                urlField: productUrl
+                filepathField: productFilePath
+                contentFields:
+                  - productDescription
+                contentFieldsSeparator: |+
+
+              topNDocuments: 5
+              queryType: semantic
+              semanticConfiguration: defaultConfiguration
+              inScope: true
+              roleInformation: roleInformation
+        messages:
+          - role: user
+            content: Where can I find a hiking place in Seattle?
+        temperature: 0.9
+    dataSource:
+      type: object
+      description: The data source to be used for the Azure OpenAI on your data feature.
+      properties:
+        type:
+          type: string
+          description: The data source type.
+        parameters:
+          type: object
+          description: The parameters to be used for the data source in runtime.
+          additionalProperties: true
+      required:
+        - type
+    message:
+      type: object
+      description: A chat message.
+      properties:
+        index:
+          type: integer
+          description: The index of the message in the conversation.
+        role:
+          type: string
+          enum:
+            - system
+            - user
+            - assistant
+            - tool
+          description: The role of the author of this message.
+        recipient:
+          type: string
+          example: Contoso.productsUsingGET
+          description: The recipient of the message in the format of <namespace>.<operation>. Present if and only if the recipient is tool.
+        content:
+          type: string
+          description: The contents of the message
+        end_turn:
+          type: boolean
+          description: Whether the message ends the turn.
+        context:
+          type: object
+          description: The conversation context
+          nullable: true
+          properties:
+            messages:
+              type: array
+              description: Messages exchanged between model and extensions prior to final message from model
+              minItems: 1
+              items:
+                $ref: '#/components/schemas/message'
+              nullable: true
+      required:
+        - role
+        - content
+    chatCompletionsResponseCommon:
+      type: object
+      properties:
+        id:
+          type: string
+        object:
+          type: string
+        created:
+          type: integer
+          format: unixtime
+        model:
+          type: string
+        usage:
+          type: object
+          properties:
+            prompt_tokens:
+              type: integer
+            completion_tokens:
+              type: integer
+            total_tokens:
+              type: integer
+          required:
+            - prompt_tokens
+            - completion_tokens
+            - total_tokens
+      required:
+        - id
+        - object
+        - created
+        - model
+    chatCompletionChoiceCommon:
+      type: object
+      properties:
+        index:
+          type: integer
+        finish_reason:
+          type: string
+    extensionsChatCompletionChoice:
+      type: object
+      allOf:
+        - $ref: '#/components/schemas/chatCompletionChoiceCommon'
+        - properties:
+            message:
+              description: The message returned by the service.
+              $ref: '#/components/schemas/message'
+    extensionsChatCompletionsResponse:
+      type: object
+      description: The response of the extensions chat completions.
+      allOf:
+        - $ref: '#/components/schemas/chatCompletionsResponseCommon'
+        - properties:
+            choices:
+              type: array
+              items:
+                $ref: '#/components/schemas/extensionsChatCompletionChoice'
+      example:
+        id: '1'
+        object: extensions.chat.completion
+        created: 1679201802
+        model: gpt-3.5-turbo-0301
+        choices:
+          - index: 0
+            finish_reason: stop
+            message:
+              role: assistant
+              content: Seattle is a great place for hiking! Here are some of the best hiking places in Seattle according to Contoso Traveler [doc1] and West Coast Traveler, Snow Lake, Mount Si, and Mount Tenerife [doc2]. I hope this helps! Let me know if you need more information.
+              end_turn: true
+              context:
+                messages:
+                  - role: tool
+                    content: '{"citations":[{"filepath":"ContosoTraveler.pdf","content":"This is the content of the citation 1"},{"filepath":"WestCoastTraveler.html","content":"This is the content of the citation 2"},{"content":"This is the content of the citation 3 without filepath"}],"intent":"hiking place in seattle"}'
+                    end_turn: false
+  securitySchemes:
+    bearer:
+      type: oauth2
+      flows:
+        implicit:
+          authorizationUrl: https://login.microsoftonline.com/common/oauth2/v2.0/authorize
+          scopes: {}
+      x-tokenInfoFunc: api.middleware.auth.bearer_auth
+      x-scopeValidateFunc: api.middleware.auth.validate_scopes
+    apiKey:
+      type: apiKey
+      name: api-key
+      in: header
diff --git a/openapi/azure.openai.chat/types.bal b/openapi/azure.openai.chat/types.bal
index 30d82b9b2..ac26dce06 100644
--- a/openapi/azure.openai.chat/types.bal
+++ b/openapi/azure.openai.chat/types.bal
@@ -1,3 +1,6 @@
+// AUTO-GENERATED FILE. DO NOT MODIFY.
+// This file is auto-generated by the Ballerina OpenAPI tool.
+
 import ballerina/http;
 import ballerina/constraint;
 
@@ -7,7 +10,7 @@ public type ConnectionConfig record {|
     # Provides Auth configurations needed when communicating with a remote HTTP endpoint.
     http:BearerTokenConfig|ApiKeysConfig auth;
     # The HTTP version understood by the client
-    http:HttpVersion httpVersion = http:HTTP_1_1;
+    http:HttpVersion httpVersion = http:HTTP_2_0;
     # Configurations related to HTTP/1.x protocol
     ClientHttp1Settings http1Settings?;
     # Configurations related to HTTP/2 protocol
@@ -66,80 +69,224 @@ public type ApiKeysConfig record {|
     string apiKey;
 |};
 
-public type Inline_response_200_message record {
+# A chat message.
+public type Message record {
+    # The index of the message in the conversation.
+    int index?;
     # The role of the author of this message.
-    string role;
+    "system"|"user"|"assistant"|"tool" role;
+    # The recipient of the message in the format of <namespace>.<operation>. Present if and only if the recipient is tool.
+    string recipient?;
     # The contents of the message
     string content;
+    # Whether the message ends the turn.
+    boolean end_turn?;
+    # The conversation context
+    Message_context? context?;
+};
+
+public type ChatCompletionsResponseCommon_usage record {
+    int prompt_tokens;
+    int completion_tokens;
+    int total_tokens;
+};
+
+# Inner error with additional details.
+public type InnerError record {
+    # Error codes for the inner error object.
+    InnerErrorCode code?;
+    # Information about the content filtering category (hate, sexual, violence, self_harm), if it has been detected, as well as the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+    ContentFilterResults content_filter_results?;
 };
 
-public type Chat_completions_body record {
-    # The messages to generate chat completions for, in the chat format.
+public type ChatCompletionRequestMessage record {
+    # The role of the messages author. One of `system`, `user`, `assistant`, or `function`.
+    "system"|"user"|"assistant"|"function" role;
+    # The contents of the message. `content` is required for all messages except assistant messages with function calls.
+    string? content;
+    # The name of the author of this message. `name` is required if role is `function`, and it should be the name of the function whose response is in the `content`. May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters.
+    string name?;
+    # The name and arguments of a function that should be called, as generated by the model.
+    ChatCompletionRequestMessage_function_call function_call?;
+};
+
+public type ChatCompletionChoiceCommon record {
+    int index?;
+    string finish_reason?;
+};
+
+public type CreateChatCompletionRequest record {
+    *ChatCompletionsRequestCommon;
+    # A list of messages comprising the conversation so far. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
     @constraint:Array {minLength: 1}
-    Deploymentsdeploymentidchatcompletions_messages[] messages;
+    ChatCompletionRequestMessage[] messages;
+    # A list of functions the model may generate JSON inputs for.
+    @constraint:Array {minLength: 1}
+    ChatCompletionFunctions[] functions?;
+    # Controls how the model responds to function calls. "none" means the model does not call a function, and responds to the end-user. "auto" means the model can pick between an end-user or calling a function.  Specifying a particular function via `{"name":\ "my_function"}` forces the model to call that function. "none" is the default when no functions are present. "auto" is the default if functions are present.
+    "none"|"auto"|record {
+        # The name of the function to call.
+        string name;
+    } function_call?;
+    # How many chat completion choices to generate for each input message.
+    int? n = 1;
+};
+
+public type ChatCompletionsResponseCommon record {
+    string id;
+    string 'object;
+    int created;
+    string model;
+    ChatCompletionsResponseCommon_usage usage?;
+};
+
+# Information about the content filtering category (hate, sexual, violence, self_harm), if it has been detected, as well as the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+public type ContentFilterResults record {
+    # Information about the content filtering category including the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+    ContentFilterResult sexual?;
+    # Information about the content filtering category including the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+    ContentFilterResult violence?;
+    # Information about the content filtering category including the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+    ContentFilterResult hate?;
+    # Information about the content filtering category including the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+    ContentFilterResult self_harm?;
+    # Error information returned by the service.
+    ErrorBase 'error?;
+};
+
+# Error information returned by the service.
+public type ErrorBase record {
+    # The error code.
+    string code?;
+    # The error message.
+    string message?;
+};
+
+public type ChatCompletionsRequestCommon record {
     # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
     # We generally recommend altering this or `top_p` but not both.
-    decimal? temperature = 1;
+    decimal? temperature?;
     # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
     # We generally recommend altering this or `temperature` but not both.
-    decimal? top_p = 1;
-    # How many chat completion choices to generate for each input message.
-    int? n = 1;
+    decimal? top_p?;
     # If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.
-    boolean? 'stream = false;
+    boolean? 'stream?;
     # Up to 4 sequences where the API will stop generating further tokens.
-    string|string[]? stop = "null";
+    string|string[]? stop?;
     # The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens).
     int max_tokens?;
     # Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
     @constraint:Number {minValue: -2, maxValue: 2}
-    decimal presence_penalty = 0;
+    decimal presence_penalty?;
     # Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
     @constraint:Number {minValue: -2, maxValue: 2}
-    decimal frequency_penalty = 0;
+    decimal frequency_penalty?;
     # Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
     record {}? logit_bias?;
     # A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.
     string user?;
 };
 
-public type ErrorResponse_error record {
-    string code?;
-    string message?;
-    string param?;
-    string 'type?;
+# The name and arguments of a function that should be called, as generated by the model.
+public type ChatCompletionRequestMessage_function_call record {
+    # The name of the function to call.
+    string name?;
+    # The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
+    string arguments?;
 };
 
-public type Inline_response_200_usage record {
-    int prompt_tokens;
-    int completion_tokens;
-    int total_tokens;
+# The response of the extensions chat completions.
+public type ExtensionsChatCompletionsResponse record {
+    *ChatCompletionsResponseCommon;
+    # A list of chat completion choices.
+    ExtensionsChatCompletionChoice[] choices?;
 };
 
-public type Inline_response_200_choices record {
-    int index?;
-    Inline_response_200_message message?;
-    string finish_reason?;
+# Error codes for the inner error object.
+public type InnerErrorCode "ResponsibleAIPolicyViolation";
+
+# Content filtering results for a single prompt in the request.
+public type PromptFilterResult record {
+    # The index of the prompt in the set of prompt results.
+    int prompt_index?;
+    # Information about the content filtering category (hate, sexual, violence, self_harm), if it has been detected, as well as the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+    ContentFilterResults content_filter_results?;
 };
 
-public type Deploymentsdeploymentidchatcompletions_messages record {
-    # The role of the author of this message.
-    string role;
-    # The contents of the message
-    string content;
-    # The name of the user in a multi-user chat
-    string name?;
+# Content filtering results for zero or more prompts in the request. In a streaming request, results for different prompts may arrive at different times or in different orders.
+public type PromptFilterResults PromptFilterResult[];
+
+public type CreateChatCompletionResponse record {
+    *ChatCompletionsResponseCommon;
+    record {
+        *ChatCompletionChoiceCommon;
+        ChatCompletionResponseMessage message?;
+        # Information about the content filtering category (hate, sexual, violence, self_harm), if it has been detected, as well as the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+        ContentFilterResults content_filter_results?;
+    }[] choices;
 };
 
-public type Inline_response_200 record {
-    string id;
-    string 'object;
-    int created;
-    string model;
-    Inline_response_200_choices[] choices;
-    Inline_response_200_usage usage?;
+public type Error ErrorBase;
+
+public type ChatCompletionFunctions record {
+    # The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
+    string name;
+    # The description of what the function does.
+    string description?;
+    # The parameters the functions accepts, described as a JSON Schema object. See the [guide](/docs/guides/gpt/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.
+    ChatCompletionFunctionParameters parameters?;
 };
 
 public type ErrorResponse record {
-    ErrorResponse_error 'error?;
+    Error 'error?;
+};
+
+# The data source to be used for the Azure OpenAI on your data feature.
+public type DataSource record {
+    # The data source type.
+    string 'type;
+    # The parameters to be used for the data source in runtime.
+    record {} parameters?;
+};
+
+public type ChatCompletionResponseMessage record {
+    # The role of the author of this message.
+    "system"|"user"|"assistant"|"function" role;
+    # The contents of the message.
+    string content?;
+    # The name and arguments of a function that should be called, as generated by the model.
+    ChatCompletionRequestMessage_function_call function_call?;
+};
+
+# The conversation context
+public type Message_context record {
+    # Messages exchanged between model and extensions prior to final message from model
+    Message[]? messages?;
+};
+
+# Information about the content filtering category including the severity level (very_low, low, medium, high-scale that determines the intensity and risk level of harmful content) and if it has been filtered or not.
+public type ContentFilterResult record {
+    # The severity level of the content filter result.
+    "safe"|"low"|"medium"|"high" severity;
+    # Whether the content filter result has been filtered or not.
+    boolean filtered;
+};
+
+# Request for the chat completions using extensions
+public type ExtensionsChatCompletionsRequest record {
+    *ChatCompletionsRequestCommon;
+    # A list of messages comprising the conversation so far. [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb).
+    Message[] messages;
+    # The data sources to be used for the Azure OpenAI on your data feature.
+    DataSource[] dataSources?;
+};
+
+public type ExtensionsChatCompletionChoice record {
+    *ChatCompletionChoiceCommon;
+    # A chat message.
+    Message message?;
+};
+
+# The parameters the functions accepts, described as a JSON Schema object. See the [guide](/docs/guides/gpt/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.
+public type ChatCompletionFunctionParameters record {
 };
diff --git a/openapi/azure.openai.chat/utils.bal b/openapi/azure.openai.chat/utils.bal
index 5a860b4d7..29d80caaa 100644
--- a/openapi/azure.openai.chat/utils.bal
+++ b/openapi/azure.openai.chat/utils.bal
@@ -1,3 +1,6 @@
+// AUTO-GENERATED FILE. DO NOT MODIFY.
+// This file is auto-generated by the Ballerina OpenAPI tool.
+
 import ballerina/url;
 
 type SimpleBasicType string|boolean|int|float|decimal;