diff --git a/reference/library-reference.rst b/reference/library-reference.rst index e8b6c7e0..296fc250 100644 --- a/reference/library-reference.rst +++ b/reference/library-reference.rst @@ -227,8 +227,8 @@ Returns the most likely transcription if ``show_all`` is false (the default). Ot Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection. -``recognizer_instance.recognize_google_cloud(audio_data: AudioData, credentials_json: Union[str, None] = None, language: str = "en-US", preferred_phrases: Union[Iterable[str], None] = None, show_all: bool = False) -> Union[str, Dict[str, Any]]`` ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +``recognizer_instance.recognize_google_cloud(audio_data: AudioData, credentials_json: Union[str, None] = None, language: str = "en-US", preferred_phrases: Union[Iterable[str], None] = None, show_all: bool = False, **api_params) -> Union[str, Dict[str, Any]]`` +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Google Cloud Speech API. @@ -238,6 +238,12 @@ The recognition language is determined by ``language``, which is a BCP-47 langua If ``preferred_phrases`` is an iterable of phrase strings, those given phrases will be more likely to be recognized over similar-sounding alternatives. This is useful for things like keyword/command recognition or adding new phrases that aren't in Google's vocabulary. Note that the API imposes certain `restrictions on the list of phrase strings `__. +``api_params`` are Cloud Speech API-specific parameters as dict (optional). For more information see + +The ``use_enhanced`` is a boolean option. If use_enhanced is set to true and the model field is not set, then an appropriate enhanced model is chosen if an enhanced model exists for the audio. If use_enhanced is true and an enhanced version of the specified model does not exist, then the speech is recognized using the standard version of the specified model. + +Furthermore, if the option ``use_enhanced`` has not been set the option ``model`` can be used, which can be used to select the model best suited to your domain to get best results. If a model is not explicitly specified, then we auto-select a model based on the other parameters of this method. + Returns the most likely transcription if ``show_all`` is False (the default). Otherwise, returns the raw API response as a JSON dictionary. Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the credentials aren't valid, or if there is no Internet connection. diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py index 253ab0fe..d05a6285 100644 --- a/speech_recognition/__init__.py +++ b/speech_recognition/__init__.py @@ -693,7 +693,7 @@ def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, g if hypothesis is not None: return hypothesis.hypstr raise UnknownValueError() # no transcriptions available - def recognize_google_cloud(self, audio_data, credentials_json=None, language="en-US", preferred_phrases=None, show_all=False): + def recognize_google_cloud(self, audio_data, credentials_json=None, language="en-US", preferred_phrases=None, show_all=False, **api_params): """ Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Google Cloud Speech API. @@ -703,6 +703,17 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en If ``preferred_phrases`` is an iterable of phrase strings, those given phrases will be more likely to be recognized over similar-sounding alternatives. This is useful for things like keyword/command recognition or adding new phrases that aren't in Google's vocabulary. Note that the API imposes certain `restrictions on the list of phrase strings `__. + ``api_params`` are Cloud Speech API-specific parameters as dict (optional). For more information see + + The ``use_enhanced`` is a boolean option. If use_enhanced is set to true and the model field is not set, + then an appropriate enhanced model is chosen if an enhanced model exists for the audio. + If use_enhanced is true and an enhanced version of the specified model does not exist, + then the speech is recognized using the standard version of the specified model. + + Furthermore, if the option ``use_enhanced`` has not been set the option ``model`` can be used, which can be used to select the model best + suited to your domain to get best results. If a model is not explicitly specified, + then we auto-select a model based on the other parameters of this method. + Returns the most likely transcription if ``show_all`` is False (the default). Otherwise, returns the raw API response as a JSON dictionary. Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the credentials aren't valid, or if there is no Internet connection. @@ -735,7 +746,8 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en config = { 'encoding': speech.RecognitionConfig.AudioEncoding.FLAC, 'sample_rate_hertz': audio_data.sample_rate, - 'language_code': language + 'language_code': language, + **api_params, } if preferred_phrases is not None: config['speechContexts'] = [speech.SpeechContext(