Skip to content

Commit

Permalink
feat(api): add gpt-4o-audio-preview model for chat completions (#1135)
Browse files Browse the repository at this point in the history
This enables audio inputs and outputs. https://platform.openai.com/docs/guides/audio
  • Loading branch information
Stainless Bot authored and RobertCraigie committed Oct 17, 2024
1 parent 3c32662 commit 17a623f
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .stats.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
configured_endpoints: 68
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-71e58a77027c67e003fdd1b1ac8ac11557d8bfabc7666d1a827c6b1ca8ab98b5.yml
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8729aaa35436531ab453224af10e67f89677db8f350f0346bb3537489edea649.yml
4 changes: 4 additions & 0 deletions api.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,20 @@ Types:

- <code><a href="./src/resources/chat/completions.ts">ChatCompletion</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionAssistantMessageParam</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionAudio</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionAudioParam</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionChunk</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionContentPart</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionContentPartImage</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionContentPartInputAudio</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionContentPartRefusal</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionContentPartText</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionFunctionCallOption</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionFunctionMessageParam</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionMessage</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionMessageParam</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionMessageToolCall</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionModality</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionNamedToolChoice</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionRole</a></code>
- <code><a href="./src/resources/chat/completions.ts">ChatCompletionStreamOptions</a></code>
Expand Down
4 changes: 4 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,16 +250,20 @@ export namespace OpenAI {
export import ChatModel = API.ChatModel;
export import ChatCompletion = API.ChatCompletion;
export import ChatCompletionAssistantMessageParam = API.ChatCompletionAssistantMessageParam;
export import ChatCompletionAudio = API.ChatCompletionAudio;
export import ChatCompletionAudioParam = API.ChatCompletionAudioParam;
export import ChatCompletionChunk = API.ChatCompletionChunk;
export import ChatCompletionContentPart = API.ChatCompletionContentPart;
export import ChatCompletionContentPartImage = API.ChatCompletionContentPartImage;
export import ChatCompletionContentPartInputAudio = API.ChatCompletionContentPartInputAudio;
export import ChatCompletionContentPartRefusal = API.ChatCompletionContentPartRefusal;
export import ChatCompletionContentPartText = API.ChatCompletionContentPartText;
export import ChatCompletionFunctionCallOption = API.ChatCompletionFunctionCallOption;
export import ChatCompletionFunctionMessageParam = API.ChatCompletionFunctionMessageParam;
export import ChatCompletionMessage = API.ChatCompletionMessage;
export import ChatCompletionMessageParam = API.ChatCompletionMessageParam;
export import ChatCompletionMessageToolCall = API.ChatCompletionMessageToolCall;
export import ChatCompletionModality = API.ChatCompletionModality;
export import ChatCompletionNamedToolChoice = API.ChatCompletionNamedToolChoice;
export import ChatCompletionRole = API.ChatCompletionRole;
export import ChatCompletionStreamOptions = API.ChatCompletionStreamOptions;
Expand Down
4 changes: 3 additions & 1 deletion src/lib/AbstractChatCompletionRunner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ export class AbstractChatCompletionRunner<
const message = this.messages[i];
if (isAssistantMessage(message)) {
const { function_call, ...rest } = message;
const ret: ChatCompletionMessage = {

// TODO: support audio here
const ret: Omit<ChatCompletionMessage, 'audio'> = {
...rest,
content: (message as ChatCompletionMessage).content ?? null,
refusal: (message as ChatCompletionMessage).refusal ?? null,
Expand Down
10 changes: 10 additions & 0 deletions src/resources/beta/assistants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,11 @@ export namespace AssistantStreamEvent {
data: ThreadsAPI.Thread;

event: 'thread.created';

/**
* Whether to enable input audio transcription.
*/
enabled?: boolean;
}

/**
Expand Down Expand Up @@ -1084,6 +1089,11 @@ export interface ThreadStreamEvent {
data: ThreadsAPI.Thread;

event: 'thread.created';

/**
* Whether to enable input audio transcription.
*/
enabled?: boolean;
}

export interface AssistantCreateParams {
Expand Down
7 changes: 7 additions & 0 deletions src/resources/chat/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ export type ChatModel =
| 'gpt-4o'
| 'gpt-4o-2024-08-06'
| 'gpt-4o-2024-05-13'
| 'gpt-4o-realtime-preview'
| 'gpt-4o-realtime-preview-2024-10-01'
| 'gpt-4o-audio-preview'
| 'gpt-4o-audio-preview-2024-10-01'
| 'chatgpt-4o-latest'
| 'gpt-4o-mini'
| 'gpt-4o-mini-2024-07-18'
Expand Down Expand Up @@ -45,16 +48,20 @@ export namespace Chat {
export import Completions = CompletionsAPI.Completions;
export import ChatCompletion = CompletionsAPI.ChatCompletion;
export import ChatCompletionAssistantMessageParam = CompletionsAPI.ChatCompletionAssistantMessageParam;
export import ChatCompletionAudio = CompletionsAPI.ChatCompletionAudio;
export import ChatCompletionAudioParam = CompletionsAPI.ChatCompletionAudioParam;
export import ChatCompletionChunk = CompletionsAPI.ChatCompletionChunk;
export import ChatCompletionContentPart = CompletionsAPI.ChatCompletionContentPart;
export import ChatCompletionContentPartImage = CompletionsAPI.ChatCompletionContentPartImage;
export import ChatCompletionContentPartInputAudio = CompletionsAPI.ChatCompletionContentPartInputAudio;
export import ChatCompletionContentPartRefusal = CompletionsAPI.ChatCompletionContentPartRefusal;
export import ChatCompletionContentPartText = CompletionsAPI.ChatCompletionContentPartText;
export import ChatCompletionFunctionCallOption = CompletionsAPI.ChatCompletionFunctionCallOption;
export import ChatCompletionFunctionMessageParam = CompletionsAPI.ChatCompletionFunctionMessageParam;
export import ChatCompletionMessage = CompletionsAPI.ChatCompletionMessage;
export import ChatCompletionMessageParam = CompletionsAPI.ChatCompletionMessageParam;
export import ChatCompletionMessageToolCall = CompletionsAPI.ChatCompletionMessageToolCall;
export import ChatCompletionModality = CompletionsAPI.ChatCompletionModality;
export import ChatCompletionNamedToolChoice = CompletionsAPI.ChatCompletionNamedToolChoice;
export import ChatCompletionRole = CompletionsAPI.ChatCompletionRole;
export import ChatCompletionStreamOptions = CompletionsAPI.ChatCompletionStreamOptions;
Expand Down
153 changes: 148 additions & 5 deletions src/resources/chat/completions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ import { Stream } from '../../streaming';

export class Completions extends APIResource {
/**
* Creates a model response for the given chat conversation.
* Creates a model response for the given chat conversation. Learn more in the
* [text generation](https://platform.openai.com/docs/guides/text-generation),
* [vision](https://platform.openai.com/docs/guides/vision), and
* [audio](https://platform.openai.com/docs/guides/audio) guides.
*/
create(
body: ChatCompletionCreateParamsNonStreaming,
Expand Down Expand Up @@ -138,6 +141,12 @@ export interface ChatCompletionAssistantMessageParam {
*/
role: 'assistant';

/**
* Data about a previous audio response from the model.
* [Learn more](https://platform.openai.com/docs/guides/audio).
*/
audio?: ChatCompletionAssistantMessageParam.Audio | null;

/**
* The contents of the assistant message. Required unless `tool_calls` or
* `function_call` is specified.
Expand Down Expand Up @@ -168,6 +177,17 @@ export interface ChatCompletionAssistantMessageParam {
}

export namespace ChatCompletionAssistantMessageParam {
/**
* Data about a previous audio response from the model.
* [Learn more](https://platform.openai.com/docs/guides/audio).
*/
export interface Audio {
/**
* Unique identifier for a previous audio response from the model.
*/
id: string;
}

/**
* @deprecated: Deprecated and replaced by `tool_calls`. The name and arguments of
* a function that should be called, as generated by the model.
Expand All @@ -188,6 +208,54 @@ export namespace ChatCompletionAssistantMessageParam {
}
}

/**
* If the audio output modality is requested, this object contains data about the
* audio response from the model.
* [Learn more](https://platform.openai.com/docs/guides/audio).
*/
export interface ChatCompletionAudio {
/**
* Unique identifier for this audio response.
*/
id: string;

/**
* Base64 encoded audio bytes generated by the model, in the format specified in
* the request.
*/
data: string;

/**
* The Unix timestamp (in seconds) for when this audio response will no longer be
* accessible on the server for use in multi-turn conversations.
*/
expires_at: number;

/**
* Transcript of the audio generated by the model.
*/
transcript: string;
}

/**
* Parameters for audio output. Required when audio output is requested with
* `modalities: ["audio"]`.
* [Learn more](https://platform.openai.com/docs/guides/audio).
*/
export interface ChatCompletionAudioParam {
/**
* Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`,
* or `pcm16`.
*/
format: 'wav' | 'mp3' | 'flac' | 'opus' | 'pcm16';

/**
* Specifies the voice type. Supported voices are `alloy`, `echo`, `fable`, `onyx`,
* `nova`, and `shimmer`.
*/
voice: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
}

/**
* Represents a streamed chunk of a chat completion response returned by model,
* based on the provided input.
Expand Down Expand Up @@ -371,8 +439,18 @@ export namespace ChatCompletionChunk {
}
}

export type ChatCompletionContentPart = ChatCompletionContentPartText | ChatCompletionContentPartImage;
/**
* Learn about
* [text inputs](https://platform.openai.com/docs/guides/text-generation).
*/
export type ChatCompletionContentPart =
| ChatCompletionContentPartText
| ChatCompletionContentPartImage
| ChatCompletionContentPartInputAudio;

/**
* Learn about [image inputs](https://platform.openai.com/docs/guides/vision).
*/
export interface ChatCompletionContentPartImage {
image_url: ChatCompletionContentPartImage.ImageURL;

Expand All @@ -397,6 +475,32 @@ export namespace ChatCompletionContentPartImage {
}
}

/**
* Learn about [audio inputs](https://platform.openai.com/docs/guides/audio).
*/
export interface ChatCompletionContentPartInputAudio {
input_audio: ChatCompletionContentPartInputAudio.InputAudio;

/**
* The type of the content part. Always `input_audio`.
*/
type: 'input_audio';
}

export namespace ChatCompletionContentPartInputAudio {
export interface InputAudio {
/**
* Base64 encoded audio data.
*/
data: string;

/**
* The format of the encoded audio data. Currently supports "wav" and "mp3".
*/
format: 'wav' | 'mp3';
}
}

export interface ChatCompletionContentPartRefusal {
/**
* The refusal message generated by the model.
Expand All @@ -409,6 +513,10 @@ export interface ChatCompletionContentPartRefusal {
type: 'refusal';
}

/**
* Learn about
* [text inputs](https://platform.openai.com/docs/guides/text-generation).
*/
export interface ChatCompletionContentPartText {
/**
* The text content.
Expand Down Expand Up @@ -471,6 +579,13 @@ export interface ChatCompletionMessage {
*/
role: 'assistant';

/**
* If the audio output modality is requested, this object contains data about the
* audio response from the model.
* [Learn more](https://platform.openai.com/docs/guides/audio).
*/
audio?: ChatCompletionAudio | null;

/**
* @deprecated: Deprecated and replaced by `tool_calls`. The name and arguments of
* a function that should be called, as generated by the model.
Expand Down Expand Up @@ -548,6 +663,8 @@ export namespace ChatCompletionMessageToolCall {
}
}

export type ChatCompletionModality = 'text' | 'audio';

/**
* Specifies a tool the model should use. Use to force the model to call a specific
* function.
Expand Down Expand Up @@ -743,6 +860,13 @@ export interface ChatCompletionCreateParamsBase {
*/
model: (string & {}) | ChatAPI.ChatModel;

/**
* Parameters for audio output. Required when audio output is requested with
* `modalities: ["audio"]`.
* [Learn more](https://platform.openai.com/docs/guides/audio).
*/
audio?: ChatCompletionAudioParam | null;

/**
* Number between -2.0 and 2.0. Positive values penalize new tokens based on their
* existing frequency in the text so far, decreasing the model's likelihood to
Expand Down Expand Up @@ -812,10 +936,24 @@ export interface ChatCompletionCreateParamsBase {

/**
* Developer-defined tags and values used for filtering completions in the
* [dashboard](https://platform.openai.com/completions).
* [dashboard](https://platform.openai.com/chat-completions).
*/
metadata?: Record<string, string> | null;

/**
* Output types that you would like the model to generate for this request. Most
* models are capable of generating text, which is the default:
*
* `["text"]`
*
* The `gpt-4o-audio-preview` model can also be used to
* [generate audio](https://platform.openai.com/docs/guides/audio). To request that
* this model generate both text and audio responses, you can use:
*
* `["text", "audio"]`
*/
modalities?: Array<ChatCompletionModality> | null;

/**
* How many chat completion choices to generate for each input message. Note that
* you will be charged based on the number of generated tokens across all of the
Expand Down Expand Up @@ -900,8 +1038,9 @@ export interface ChatCompletionCreateParamsBase {
stop?: string | null | Array<string>;

/**
* Whether or not to store the output of this completion request for traffic
* logging in the [dashboard](https://platform.openai.com/completions).
* Whether or not to store the output of this chat completion request for use in
* our [model distillation](https://platform.openai.com/docs/guides/distillation)
* or [evals](https://platform.openai.com/docs/guides/evals) products.
*/
store?: boolean | null;

Expand Down Expand Up @@ -1049,16 +1188,20 @@ export type CompletionCreateParamsStreaming = ChatCompletionCreateParamsStreamin
export namespace Completions {
export import ChatCompletion = ChatCompletionsAPI.ChatCompletion;
export import ChatCompletionAssistantMessageParam = ChatCompletionsAPI.ChatCompletionAssistantMessageParam;
export import ChatCompletionAudio = ChatCompletionsAPI.ChatCompletionAudio;
export import ChatCompletionAudioParam = ChatCompletionsAPI.ChatCompletionAudioParam;
export import ChatCompletionChunk = ChatCompletionsAPI.ChatCompletionChunk;
export import ChatCompletionContentPart = ChatCompletionsAPI.ChatCompletionContentPart;
export import ChatCompletionContentPartImage = ChatCompletionsAPI.ChatCompletionContentPartImage;
export import ChatCompletionContentPartInputAudio = ChatCompletionsAPI.ChatCompletionContentPartInputAudio;
export import ChatCompletionContentPartRefusal = ChatCompletionsAPI.ChatCompletionContentPartRefusal;
export import ChatCompletionContentPartText = ChatCompletionsAPI.ChatCompletionContentPartText;
export import ChatCompletionFunctionCallOption = ChatCompletionsAPI.ChatCompletionFunctionCallOption;
export import ChatCompletionFunctionMessageParam = ChatCompletionsAPI.ChatCompletionFunctionMessageParam;
export import ChatCompletionMessage = ChatCompletionsAPI.ChatCompletionMessage;
export import ChatCompletionMessageParam = ChatCompletionsAPI.ChatCompletionMessageParam;
export import ChatCompletionMessageToolCall = ChatCompletionsAPI.ChatCompletionMessageToolCall;
export import ChatCompletionModality = ChatCompletionsAPI.ChatCompletionModality;
export import ChatCompletionNamedToolChoice = ChatCompletionsAPI.ChatCompletionNamedToolChoice;
export import ChatCompletionRole = ChatCompletionsAPI.ChatCompletionRole;
export import ChatCompletionStreamOptions = ChatCompletionsAPI.ChatCompletionStreamOptions;
Expand Down
4 changes: 4 additions & 0 deletions src/resources/chat/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
export {
ChatCompletion,
ChatCompletionAssistantMessageParam,
ChatCompletionAudio,
ChatCompletionAudioParam,
ChatCompletionChunk,
ChatCompletionContentPart,
ChatCompletionContentPartImage,
ChatCompletionContentPartInputAudio,
ChatCompletionContentPartRefusal,
ChatCompletionContentPartText,
ChatCompletionFunctionCallOption,
ChatCompletionFunctionMessageParam,
ChatCompletionMessage,
ChatCompletionMessageParam,
ChatCompletionMessageToolCall,
ChatCompletionModality,
ChatCompletionNamedToolChoice,
ChatCompletionRole,
ChatCompletionStreamOptions,
Expand Down
Loading

0 comments on commit 17a623f

Please sign in to comment.