forked from Azure/azure-sdk-for-net
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Azure OpenAI: audio transcription and translation (Azure#38460)
* squashed commit: whisper transcription/translation support * Update to latest TypeSpec * rebase codegen to mvp tsp PR * Merge, snap, suppression cleanup * PR feedback: remove errant <auto-generated/> tags in /custom * Test refresh, incl. temporary BYOD rollback * test fix for recordings and omitting multipart audio bodies * PR feedback: idiomatic response formats * PR feedback: fully distinguish translation and transcription types * PR feedback: keep well-(enough)-known names for Srt,Vtt * test: revert accidentally included local swap to live test mode * full test update, including RAI adjustments * merged .tsp snap (regen pending) * code regen after merge and tsp snap * CHANGELOG update * Incorporate fabulous PR feedback. Thank you, Jose!
- Loading branch information
Showing
79 changed files
with
2,709 additions
and
409 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
121 changes: 119 additions & 2 deletions
121
sdk/openai/Azure.AI.OpenAI/api/Azure.AI.OpenAI.netstandard2.0.cs
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
sdk/openai/Azure.AI.OpenAI/src/Custom.Suppressions/OpenAIClient.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#nullable disable | ||
|
||
using System.Threading; | ||
using Azure.Core; | ||
|
||
namespace Azure.AI.OpenAI | ||
{ | ||
[CodeGenSuppress("GetCompletions", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetCompletionsAsync", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetChatCompletions", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetChatCompletionsAsync", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetEmbeddings", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetEmbeddingsAsync", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetChatCompletionsWithAzureExtensions", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetChatCompletionsWithAzureExtensions", typeof(string), typeof(ChatCompletionsOptions), typeof(CancellationToken))] | ||
[CodeGenSuppress("GetChatCompletionsWithAzureExtensionsAsync", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetChatCompletionsWithAzureExtensionsAsync", typeof(string), typeof(ChatCompletionsOptions), typeof(CancellationToken))] | ||
[CodeGenSuppress("GetAudioTranscriptionAsPlainText", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetAudioTranscriptionAsPlainText", typeof(string), typeof(AudioTranscriptionOptions), typeof(CancellationToken))] | ||
[CodeGenSuppress("GetAudioTranscriptionAsPlainTextAsync", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetAudioTranscriptionAsPlainTextAsync", typeof(string), typeof(AudioTranscriptionOptions), typeof(CancellationToken))] | ||
[CodeGenSuppress("GetAudioTranscriptionAsResponseObject", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetAudioTranscriptionAsResponseObject", typeof(string), typeof(AudioTranscriptionOptions), typeof(CancellationToken))] | ||
[CodeGenSuppress("GetAudioTranscriptionAsResponseObjectAsync", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetAudioTranscriptionAsResponseObjectAsync", typeof(string), typeof(AudioTranscriptionOptions), typeof(CancellationToken))] | ||
[CodeGenSuppress("GetAudioTranslationAsPlainText", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetAudioTranslationAsPlainText", typeof(string), typeof(AudioTranslationOptions), typeof(CancellationToken))] | ||
[CodeGenSuppress("GetAudioTranslationAsPlainTextAsync", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetAudioTranslationAsPlainTextAsync", typeof(string), typeof(AudioTranslationOptions), typeof(CancellationToken))] | ||
[CodeGenSuppress("GetAudioTranslationAsResponseObject", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetAudioTranslationAsResponseObject", typeof(string), typeof(AudioTranslationOptions), typeof(CancellationToken))] | ||
[CodeGenSuppress("GetAudioTranslationAsResponseObjectAsync", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("GetAudioTranslationAsResponseObjectAsync", typeof(string), typeof(AudioTranslationOptions), typeof(CancellationToken))] | ||
[CodeGenSuppress("CreateGetCompletionsRequest", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("CreateGetChatCompletionsRequest", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("CreateGetEmbeddingsRequest", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("CreateGetChatCompletionsWithAzureExtensionsRequest", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("CreateGetAudioTranscriptionAsPlainTextRequest", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("CreateGetAudioTranscriptionAsResponseObjectRequest", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("CreateGetAudioTranslationAsPlainTextRequest", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
[CodeGenSuppress("CreateGetAudioTranslationAsResponseObjectRequest", typeof(string), typeof(RequestContent), typeof(RequestContext))] | ||
public partial class OpenAIClient | ||
{ | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#nullable disable | ||
|
||
namespace Azure.AI.OpenAI | ||
{ | ||
internal readonly partial struct AudioTaskLabel | ||
{ | ||
// CUSTOM CODE NOTE: here to demote visibility to internal. | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
sdk/openai/Azure.AI.OpenAI/src/Custom/AudioTranscription.Serialization.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#nullable disable | ||
|
||
using System.Text.Json; | ||
|
||
namespace Azure.AI.OpenAI | ||
{ | ||
public partial class AudioTranscription | ||
{ | ||
internal static AudioTranscription FromResponse(Response response) | ||
{ | ||
if (response.Headers.ContentType.Contains("text/plain")) | ||
{ | ||
return new AudioTranscription( | ||
text: response.Content.ToString(), | ||
internalAudioTaskLabel: null, | ||
language: null, | ||
duration: default, | ||
segments: null); | ||
} | ||
else | ||
{ | ||
using var document = JsonDocument.Parse(response.Content); | ||
return DeserializeAudioTranscription(document.RootElement); | ||
} | ||
} | ||
} | ||
} |
15 changes: 15 additions & 0 deletions
15
sdk/openai/Azure.AI.OpenAI/src/Custom/AudioTranscription.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#nullable disable | ||
|
||
namespace Azure.AI.OpenAI | ||
{ | ||
public partial class AudioTranscription | ||
{ | ||
// CUSTOM CODE NOTE: included to demote visibility of 'task' | ||
|
||
/// <summary> The label that describes which operation type generated the accompanying response data. </summary> | ||
internal AudioTaskLabel? InternalAudioTaskLabel { get; } | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
sdk/openai/Azure.AI.OpenAI/src/Custom/AudioTranscriptionFormat.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#nullable disable | ||
|
||
using System; | ||
using Azure.Core; | ||
|
||
namespace Azure.AI.OpenAI | ||
{ | ||
public readonly partial struct AudioTranscriptionFormat : IEquatable<AudioTranscriptionFormat> | ||
{ | ||
/// <summary> | ||
/// Specifies that a transcription response should provide plain, unannotated text with no additional metadata. | ||
/// </summary> | ||
[CodeGenMember("Json")] | ||
public static AudioTranscriptionFormat Simple { get; } = new AudioTranscriptionFormat(SimpleValue); | ||
|
||
/// <summary> | ||
/// Specifies that a transcription response should provide plain, unannotated text with additional metadata | ||
/// including timings, probability scores, and other processing details. | ||
/// </summary> | ||
[CodeGenMember("VerboseJson")] | ||
public static AudioTranscriptionFormat Verbose { get; } = new AudioTranscriptionFormat(VerboseValue); | ||
/// <summary> Use a response body that is plain text containing the raw, unannotated transcription. </summary> | ||
|
||
// (Note: text is hidden as its behavior is redundant with 'json' when using a shared, strongly-typed response | ||
// value container) | ||
[CodeGenMember("Text")] | ||
internal static AudioTranscriptionFormat InternalPlainText { get; } = new AudioTranscriptionFormat(InternalPlainTextValue); | ||
} | ||
} |
37 changes: 37 additions & 0 deletions
37
sdk/openai/Azure.AI.OpenAI/src/Custom/AudioTranscriptionOptions.Serialization.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#nullable disable | ||
|
||
using System.Net.Http; | ||
using Azure.Core; | ||
|
||
namespace Azure.AI.OpenAI | ||
{ | ||
public partial class AudioTranscriptionOptions | ||
{ | ||
internal virtual RequestContent ToRequestContent() | ||
{ | ||
var content = new MultipartFormDataRequestContent(); | ||
content.Add(new StringContent(InternalNonAzureModelName), "model"); | ||
content.Add(new ByteArrayContent(AudioData.ToArray()), "file", "@file.wav"); | ||
if (Optional.IsDefined(ResponseFormat)) | ||
{ | ||
content.Add(new StringContent(ResponseFormat.ToString()), "response_format"); | ||
} | ||
if (Optional.IsDefined(Prompt)) | ||
{ | ||
content.Add(new StringContent(Prompt), "prompt"); | ||
} | ||
if (Optional.IsDefined(Temperature)) | ||
{ | ||
content.Add(new StringContent($"{Temperature}"), "temperature"); | ||
} | ||
if (Optional.IsDefined(Language)) | ||
{ | ||
content.Add(new StringContent(Language), "language"); | ||
} | ||
return content; | ||
} | ||
} | ||
} |
37 changes: 37 additions & 0 deletions
37
sdk/openai/Azure.AI.OpenAI/src/Custom/AudioTranscriptionOptions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#nullable disable | ||
|
||
using System; | ||
|
||
namespace Azure.AI.OpenAI | ||
{ | ||
public partial class AudioTranscriptionOptions | ||
{ | ||
/// <summary> | ||
/// The audio data to transcribe. This must be the binary content of a file in one of the supported media formats: | ||
/// flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm. | ||
/// <para> | ||
/// To assign a byte[] to this property use <see cref="BinaryData.FromBytes(byte[])"/>. | ||
/// The byte[] will be serialized to a Base64 encoded string. | ||
/// </para> | ||
/// <para> | ||
/// Examples: | ||
/// <list type="bullet"> | ||
/// <item> | ||
/// <term>BinaryData.FromBytes(new byte[] { 1, 2, 3 })</term> | ||
/// <description>Creates a payload of "AQID".</description> | ||
/// </item> | ||
/// </list> | ||
/// </para> | ||
/// </summary> | ||
public BinaryData AudioData { get; set; } | ||
|
||
/// <summary> Initializes a new instance of AudioTranscriptionOptions. </summary> | ||
public AudioTranscriptionOptions() | ||
{ } | ||
|
||
internal string InternalNonAzureModelName { get; set; } | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
sdk/openai/Azure.AI.OpenAI/src/Custom/AudioTranslation.Serialization.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#nullable disable | ||
|
||
using System.Text.Json; | ||
|
||
namespace Azure.AI.OpenAI | ||
{ | ||
public partial class AudioTranslation | ||
{ | ||
internal static AudioTranslation FromResponse(Response response) | ||
{ | ||
if (response.Headers.ContentType.Contains("text/plain")) | ||
{ | ||
return new AudioTranslation( | ||
text: response.Content.ToString(), | ||
internalAudioTaskLabel: null, | ||
language: null, | ||
duration: default, | ||
segments: null); | ||
} | ||
else | ||
{ | ||
using var document = JsonDocument.Parse(response.Content); | ||
return DeserializeAudioTranslation(document.RootElement); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#nullable disable | ||
|
||
namespace Azure.AI.OpenAI | ||
{ | ||
/// <summary> Result information for an operation that translated spoken audio into written text. </summary> | ||
public partial class AudioTranslation | ||
{ | ||
/// <summary> The label that describes which operation type generated the accompanying response data. </summary> | ||
internal AudioTaskLabel? InternalAudioTaskLabel { get; } | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
sdk/openai/Azure.AI.OpenAI/src/Custom/AudioTranslationFormat.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
#nullable disable | ||
|
||
using System; | ||
using Azure.Core; | ||
|
||
namespace Azure.AI.OpenAI | ||
{ | ||
public readonly partial struct AudioTranslationFormat : IEquatable<AudioTranslationFormat> | ||
{ | ||
/// <summary> | ||
/// Specifies that a transcription response should provide plain, unannotated text with no additional metadata. | ||
/// </summary> | ||
[CodeGenMember("Json")] | ||
public static AudioTranslationFormat Simple { get; } = new AudioTranslationFormat(SimpleValue); | ||
|
||
/// <summary> | ||
/// Specifies that a transcription response should provide plain, unannotated text with additional metadata | ||
/// including timings, probability scores, and other processing details. | ||
/// </summary> | ||
[CodeGenMember("VerboseJson")] | ||
public static AudioTranslationFormat Verbose { get; } = new AudioTranslationFormat(VerboseValue); | ||
/// <summary> Use a response body that is plain text containing the raw, unannotated transcription. </summary> | ||
|
||
// (Note: text is hidden as its behavior is redundant with 'json' when using a shared, strongly-typed response | ||
// value container) | ||
[CodeGenMember("Text")] | ||
internal static AudioTranslationFormat InternalPlainText { get; } = new AudioTranslationFormat(InternalPlainTextValue); | ||
} | ||
} |
Oops, something went wrong.