From e7b54b14ff18f837d94d9d488b8d244201f78c60 Mon Sep 17 00:00:00 2001 From: Yulin Li Date: Sun, 14 Apr 2024 13:30:35 +0800 Subject: [PATCH] support g722-16khz-64kbps audio output format --- src/sdk/Audio/AudioOutputFormat.ts | 24 ++++++++++++++++++++++++ src/sdk/Audio/AudioStreamFormat.ts | 2 ++ src/sdk/Audio/SpeakerAudioDestination.ts | 2 ++ src/sdk/SpeechSynthesisOutputFormat.ts | 17 ++++++++++++++++- 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/sdk/Audio/AudioOutputFormat.ts b/src/sdk/Audio/AudioOutputFormat.ts index 831b977d..99d5b278 100644 --- a/src/sdk/Audio/AudioOutputFormat.ts +++ b/src/sdk/Audio/AudioOutputFormat.ts @@ -50,6 +50,8 @@ export class AudioOutputFormatImpl extends AudioStreamFormatImpl { [SpeechSynthesisOutputFormat.Riff22050Hz16BitMonoPcm]: "riff-22050hz-16bit-mono-pcm", [SpeechSynthesisOutputFormat.Raw44100Hz16BitMonoPcm]: "raw-44100hz-16bit-mono-pcm", [SpeechSynthesisOutputFormat.Riff44100Hz16BitMonoPcm]: "riff-44100hz-16bit-mono-pcm", + [SpeechSynthesisOutputFormat.AmrWb16000Hz]: "amr-wb-16000hz", + [SpeechSynthesisOutputFormat.G72216Khz64Kbps]: "g722-16khz-64kbps", }; private priAudioFormatString: string; /** @@ -519,6 +521,28 @@ export class AudioOutputFormatImpl extends AudioStreamFormatImpl { speechSynthesisOutputFormatString, "raw-44100hz-16bit-mono-pcm", true); + case "amr-wb-16000h": + return new AudioOutputFormatImpl( + AudioFormatTag.AMR_WB, + 1, + 16000, + 3052, + 2, + 16, + speechSynthesisOutputFormatString, + speechSynthesisOutputFormatString, + false); + case "g722-16khz-64kbps": + return new AudioOutputFormatImpl( + AudioFormatTag.G722, + 1, + 16000, + 8000, + 2, + 16, + speechSynthesisOutputFormatString, + speechSynthesisOutputFormatString, + false); case "riff-16khz-16bit-mono-pcm": default: return new AudioOutputFormatImpl( diff --git a/src/sdk/Audio/AudioStreamFormat.ts b/src/sdk/Audio/AudioStreamFormat.ts index be9d5ac5..8730a858 100644 --- a/src/sdk/Audio/AudioStreamFormat.ts +++ b/src/sdk/Audio/AudioStreamFormat.ts @@ -13,6 +13,8 @@ export enum AudioFormatTag { ALaw, FLAC, OPUS, + AMR_WB, + G722, } /** diff --git a/src/sdk/Audio/SpeakerAudioDestination.ts b/src/sdk/Audio/SpeakerAudioDestination.ts index 40c53f79..08465658 100644 --- a/src/sdk/Audio/SpeakerAudioDestination.ts +++ b/src/sdk/Audio/SpeakerAudioDestination.ts @@ -23,6 +23,8 @@ const AudioFormatToMimeType: INumberDictionary = { [AudioFormatTag.WEBM_OPUS]: "audio/webm; codecs=opus", [AudioFormatTag.ALaw]: "audio/x-wav", [AudioFormatTag.FLAC]: "audio/flac", + [AudioFormatTag.AMR_WB]: "audio/amr-wb", + [AudioFormatTag.G722]: "audio/G722", }; /** diff --git a/src/sdk/SpeechSynthesisOutputFormat.ts b/src/sdk/SpeechSynthesisOutputFormat.ts index d19c6efc..ecfbe4b5 100644 --- a/src/sdk/SpeechSynthesisOutputFormat.ts +++ b/src/sdk/SpeechSynthesisOutputFormat.ts @@ -251,5 +251,20 @@ export enum SpeechSynthesisOutputFormat { * Added in version 1.22.0 * @member SpeechSynthesisOutputFormat.Riff44100Hz16BitMonoPcm */ - Riff44100Hz16BitMonoPcm + Riff44100Hz16BitMonoPcm, + + /** + * amr-wb-16000hz + * AMR-WB audio at 16kHz sampling rate. + * Added in version 1.38.0 + * @member SpeechSynthesisOutputFormat.AmrWb16000Hz + */ + AmrWb16000Hz, + + /** + * g722-16khz-64kbps + * G.722 audio at 16kHz sampling rate and 64kbps bitrate. + * Added in version 1.38.0 + */ + G72216Khz64Kbps }