From bbf6753a01da27049789fa2f000b2e9885f54992 Mon Sep 17 00:00:00 2001 From: ftnext Date: Sat, 7 Dec 2024 13:10:45 +0900 Subject: [PATCH] [feat] Partially support OpenAI's optional parameters --- speech_recognition/recognizers/whisper.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/speech_recognition/recognizers/whisper.py b/speech_recognition/recognizers/whisper.py index d4d63923..0ab22dc6 100644 --- a/speech_recognition/recognizers/whisper.py +++ b/speech_recognition/recognizers/whisper.py @@ -3,6 +3,8 @@ import os from typing import Literal +from typing_extensions import Unpack + from speech_recognition.audio import AudioData from speech_recognition.exceptions import SetupError from speech_recognition.recognizers.whisper_api import ( @@ -13,12 +15,27 @@ WhisperModel = Literal["whisper-1"] +class OpenAIOptionalParameters: + """OpenAI speech transcription's optional parameters. + + https://platform.openai.com/docs/api-reference/audio/createTranscription + """ + + language: str + prompt: str + # TODO Add support `Literal["text", "srt", "verbose_json", "vtt"]` + response_format: Literal["json"] + temperature: float + # timestamp_granularities # TODO support + + def recognize_whisper_api( recognizer, audio_data: "AudioData", *, model: WhisperModel = "whisper-1", api_key: str | None = None, + **kwargs: Unpack[OpenAIOptionalParameters], ) -> str: """ Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the OpenAI Whisper API. @@ -40,4 +57,4 @@ def recognize_whisper_api( ) recognizer = OpenAICompatibleRecognizer(openai.OpenAI(api_key=api_key)) - return recognizer.recognize(audio_data, model) + return recognizer.recognize(audio_data, model, **kwargs)