From c84590450d307fe80467e571cc1568c5d8ed078c Mon Sep 17 00:00:00 2001
From: Vitor Hideyoshi <vitor.h.n.batista@gmail.com>
Date: Thu, 15 Feb 2024 16:48:48 -0300
Subject: [PATCH 1/7] Adds Parameter use_enhanced and model to
 GoogleCloudSpeech

Adds the parameters use_enhanced and model to the recognize_google_cloud method for more customizable options for the user and better results in specific cases
---
 speech_recognition/__init__.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 852eaeef..b2b365be 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -670,7 +670,7 @@ def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, g
         if hypothesis is not None: return hypothesis.hypstr
         raise UnknownValueError()  # no transcriptions available
 
-    def recognize_google_cloud(self, audio_data, credentials_json=None, language="en-US", preferred_phrases=None, show_all=False):
+    def recognize_google_cloud(self, audio_data, credentials_json=None, language="en-US", preferred_phrases=None, use_enhanced=False, model=None, show_all=False):
         """
         Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Google Cloud Speech API.
 
@@ -689,6 +689,8 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en
             assert os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') is not None
         assert isinstance(language, str), "``language`` must be a string"
         assert preferred_phrases is None or all(isinstance(preferred_phrases, (type(""), type(u""))) for preferred_phrases in preferred_phrases), "``preferred_phrases`` must be a list of strings"
+        assert isinstance(use_enhanced, bool), "``use_enhanced`` must be a boolean"
+        assert model is None or model in (None, "latest_long", "latest_short", "command_and_search", "phone_call", "video", "default", "medical_conversation", "medical_dictation"), "``model`` must be None or 'command_and_search', 'phone_call', 'video', or 'default'"
 
         try:
             import socket
@@ -712,7 +714,9 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en
         config = {
             'encoding': speech.RecognitionConfig.AudioEncoding.FLAC,
             'sample_rate_hertz': audio_data.sample_rate,
-            'language_code': language
+            'language_code': language,
+            'use_enhanced': use_enhanced,
+            'model': model,
         }
         if preferred_phrases is not None:
             config['speechContexts'] = [speech.SpeechContext(

From 8e0fa407d99b92330d837d45774d01b50af5083b Mon Sep 17 00:00:00 2001
From: Vitor Hideyoshi <vitor.h.n.batista@gmail.com>
Date: Fri, 26 Apr 2024 19:12:36 +0000
Subject: [PATCH 2/7] Adds Parameters use_enhanced and model to GoogleSpeechAPI
 docstring

---
 speech_recognition/__init__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index b2b365be..2d60f7af 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -680,6 +680,10 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en
 
         If ``preferred_phrases`` is an iterable of phrase strings, those given phrases will be more likely to be recognized over similar-sounding alternatives. This is useful for things like keyword/command recognition or adding new phrases that aren't in Google's vocabulary. Note that the API imposes certain `restrictions on the list of phrase strings <https://cloud.google.com/speech/limits#content>`__.
 
+        The ``use_enhanced`` is a boolean option that sets a flag with the same name on the Google Cloud Speech API, it will make the API uses the enhanced version of the model. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/enhanced-models>` __.
+        
+        Furthermore, you can use the option ``model`` to set your desired model, the Python Google Speech API makes available the following options: 'command_and_search', 'phone_call', 'video', 'default', 'medical_conversation', 'medical_dictation'. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/transcription-model>` __.
+
         Returns the most likely transcription if ``show_all`` is False (the default). Otherwise, returns the raw API response as a JSON dictionary.
 
         Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the credentials aren't valid, or if there is no Internet connection.

From daca0004ee7fe87da9881d3772b66d5d689090b9 Mon Sep 17 00:00:00 2001
From: Vitor Hideyoshi <vitor.h.n.batista@gmail.com>
Date: Fri, 26 Apr 2024 19:27:41 +0000
Subject: [PATCH 3/7] Adds Missing Models to Docstring and Adds Missing
 Parameters to Library Reference File

---
 reference/library-reference.rst | 4 ++++
 speech_recognition/__init__.py  | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/reference/library-reference.rst b/reference/library-reference.rst
index 0aa7a8ce..5268e722 100644
--- a/reference/library-reference.rst
+++ b/reference/library-reference.rst
@@ -238,6 +238,10 @@ The recognition language is determined by ``language``, which is a BCP-47 langua
 
 If ``preferred_phrases`` is an iterable of phrase strings, those given phrases will be more likely to be recognized over similar-sounding alternatives. This is useful for things like keyword/command recognition or adding new phrases that aren't in Google's vocabulary. Note that the API imposes certain `restrictions on the list of phrase strings <https://cloud.google.com/speech/limits#content>`__.
 
+The ``use_enhanced`` is a boolean option that sets a flag with the same name on the Google Cloud Speech API, it will make the API uses the enhanced version of the model. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/enhanced-models>` __.
+
+Furthermore, you can use the option ``model`` to set your desired model, the Python Google Speech API makes available the following options: 'latest_long', 'latest_short', 'command_and_search', 'phone_call', 'video', 'default', 'medical_conversation', 'medical_dictation'. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/transcription-model>` __.
+
 Returns the most likely transcription if ``show_all`` is False (the default). Otherwise, returns the raw API response as a JSON dictionary.
 
 Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the credentials aren't valid, or if there is no Internet connection.
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 2d60f7af..6d82babb 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -682,7 +682,7 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en
 
         The ``use_enhanced`` is a boolean option that sets a flag with the same name on the Google Cloud Speech API, it will make the API uses the enhanced version of the model. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/enhanced-models>` __.
         
-        Furthermore, you can use the option ``model`` to set your desired model, the Python Google Speech API makes available the following options: 'command_and_search', 'phone_call', 'video', 'default', 'medical_conversation', 'medical_dictation'. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/transcription-model>` __.
+        Furthermore, you can use the option ``model`` to set your desired model, the Python Google Speech API makes available the following options: 'latest_long', 'latest_short', 'command_and_search', 'phone_call', 'video', 'default', 'medical_conversation', 'medical_dictation'. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/transcription-model>` __.
 
         Returns the most likely transcription if ``show_all`` is False (the default). Otherwise, returns the raw API response as a JSON dictionary.
 
@@ -694,7 +694,7 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en
         assert isinstance(language, str), "``language`` must be a string"
         assert preferred_phrases is None or all(isinstance(preferred_phrases, (type(""), type(u""))) for preferred_phrases in preferred_phrases), "``preferred_phrases`` must be a list of strings"
         assert isinstance(use_enhanced, bool), "``use_enhanced`` must be a boolean"
-        assert model is None or model in (None, "latest_long", "latest_short", "command_and_search", "phone_call", "video", "default", "medical_conversation", "medical_dictation"), "``model`` must be None or 'command_and_search', 'phone_call', 'video', or 'default'"
+        assert model is None or model in (None, "latest_long", "latest_short", "command_and_search", "phone_call", "video", "default", "medical_conversation", "medical_dictation"), "``model`` must be None or 'latest_long', 'latest_short', 'command_and_search', 'phone_call', 'video', or 'default'"
 
         try:
             import socket

From abb35fe380c522d135ca7fb345f73ec5a9064490 Mon Sep 17 00:00:00 2001
From: Vitor Hideyoshi <vitor.h.n.batista@gmail.com>
Date: Fri, 26 Apr 2024 19:37:09 +0000
Subject: [PATCH 4/7] Fixes Broken Formatting

---
 speech_recognition/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 6d82babb..02fa14dc 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -681,7 +681,7 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en
         If ``preferred_phrases`` is an iterable of phrase strings, those given phrases will be more likely to be recognized over similar-sounding alternatives. This is useful for things like keyword/command recognition or adding new phrases that aren't in Google's vocabulary. Note that the API imposes certain `restrictions on the list of phrase strings <https://cloud.google.com/speech/limits#content>`__.
 
         The ``use_enhanced`` is a boolean option that sets a flag with the same name on the Google Cloud Speech API, it will make the API uses the enhanced version of the model. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/enhanced-models>` __.
-        
+
         Furthermore, you can use the option ``model`` to set your desired model, the Python Google Speech API makes available the following options: 'latest_long', 'latest_short', 'command_and_search', 'phone_call', 'video', 'default', 'medical_conversation', 'medical_dictation'. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/transcription-model>` __.
 
         Returns the most likely transcription if ``show_all`` is False (the default). Otherwise, returns the raw API response as a JSON dictionary.

From 4be80262468d33d78dc79a9d8f8e89b44a817be9 Mon Sep 17 00:00:00 2001
From: Vitor Hideyoshi <vitor.h.n.batista@gmail.com>
Date: Wed, 27 Nov 2024 21:27:30 -0300
Subject: [PATCH 5/7] Better Implementation of API Params Configuration

This implementation is needed for the configuration of Cloud Speech API-specific parameters. This implementation only validates and creates assertions for the two most used params: use_enhanced and model.
---
 reference/library-reference.rst |  8 +++++---
 speech_recognition/__init__.py  | 25 ++++++++++++++++++-------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/reference/library-reference.rst b/reference/library-reference.rst
index 5268e722..87858464 100644
--- a/reference/library-reference.rst
+++ b/reference/library-reference.rst
@@ -227,7 +227,7 @@ Returns the most likely transcription if ``show_all`` is false (the default). Ot
 
 Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
 
-``recognizer_instance.recognize_google_cloud(audio_data: AudioData, credentials_json: Union[str, None] = None, language: str = "en-US", preferred_phrases: Union[Iterable[str], None] = None, show_all: bool = False) -> Union[str, Dict[str, Any]]``
+``recognizer_instance.recognize_google_cloud(audio_data: AudioData, credentials_json: Union[str, None] = None, language: str = "en-US", preferred_phrases: Union[Iterable[str], None] = None, show_all: bool = False, **api_params) -> Union[str, Dict[str, Any]]``
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 
 Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Google Cloud Speech API.
@@ -238,9 +238,11 @@ The recognition language is determined by ``language``, which is a BCP-47 langua
 
 If ``preferred_phrases`` is an iterable of phrase strings, those given phrases will be more likely to be recognized over similar-sounding alternatives. This is useful for things like keyword/command recognition or adding new phrases that aren't in Google's vocabulary. Note that the API imposes certain `restrictions on the list of phrase strings <https://cloud.google.com/speech/limits#content>`__.
 
-The ``use_enhanced`` is a boolean option that sets a flag with the same name on the Google Cloud Speech API, it will make the API uses the enhanced version of the model. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/enhanced-models>` __.
+``api_params`` are Cloud Speech API-specific parameters as dict (optional). For more information see <https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v1.types.RecognitionConfig>
 
-Furthermore, you can use the option ``model`` to set your desired model, the Python Google Speech API makes available the following options: 'latest_long', 'latest_short', 'command_and_search', 'phone_call', 'video', 'default', 'medical_conversation', 'medical_dictation'. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/transcription-model>` __.
+The ``use_enhanced`` is a boolean option. If use_enhanced is set to true and the model field is not set, then an appropriate enhanced model is chosen if an enhanced model exists for the audio. If use_enhanced is true and an enhanced version of the specified model does not exist, 	then the speech is recognized using the standard version of the specified model.
+
+Furthermore, if the option ``use_enhanced`` has not been set the option ``model`` can be used, which can be used to select the model best suited to your domain to get best results. If a model is not explicitly specified, then we auto-select a model based on the other parameters of this method.
 
 Returns the most likely transcription if ``show_all`` is False (the default). Otherwise, returns the raw API response as a JSON dictionary.
 
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 02fa14dc..e0002c3c 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -670,7 +670,7 @@ def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, g
         if hypothesis is not None: return hypothesis.hypstr
         raise UnknownValueError()  # no transcriptions available
 
-    def recognize_google_cloud(self, audio_data, credentials_json=None, language="en-US", preferred_phrases=None, use_enhanced=False, model=None, show_all=False):
+    def recognize_google_cloud(self, audio_data, credentials_json=None, language="en-US", preferred_phrases=None, show_all=False, **api_params):
         """
         Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Google Cloud Speech API.
 
@@ -680,9 +680,16 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en
 
         If ``preferred_phrases`` is an iterable of phrase strings, those given phrases will be more likely to be recognized over similar-sounding alternatives. This is useful for things like keyword/command recognition or adding new phrases that aren't in Google's vocabulary. Note that the API imposes certain `restrictions on the list of phrase strings <https://cloud.google.com/speech/limits#content>`__.
 
-        The ``use_enhanced`` is a boolean option that sets a flag with the same name on the Google Cloud Speech API, it will make the API uses the enhanced version of the model. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/enhanced-models>` __.
+        ``api_params`` are Cloud Speech API-specific parameters as dict (optional). For more information see <https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v1.types.RecognitionConfig>
 
-        Furthermore, you can use the option ``model`` to set your desired model, the Python Google Speech API makes available the following options: 'latest_long', 'latest_short', 'command_and_search', 'phone_call', 'video', 'default', 'medical_conversation', 'medical_dictation'. More information can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech-to-text/docs/transcription-model>` __.
+            The ``use_enhanced`` is a boolean option. If use_enhanced is set to true and the model field is not set,
+            then an appropriate enhanced model is chosen if an enhanced model exists for the audio.
+            If use_enhanced is true and an enhanced version of the specified model does not exist,
+            then the speech is recognized using the standard version of the specified model.
+
+            Furthermore, if the option ``use_enhanced`` has not been set the option ``model`` can be used, which can be used to select the model best
+            suited to your domain to get best results. If a model is not explicitly specified,
+            then we auto-select a model based on the other parameters of this method.
 
         Returns the most likely transcription if ``show_all`` is False (the default). Otherwise, returns the raw API response as a JSON dictionary.
 
@@ -693,8 +700,13 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en
             assert os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') is not None
         assert isinstance(language, str), "``language`` must be a string"
         assert preferred_phrases is None or all(isinstance(preferred_phrases, (type(""), type(u""))) for preferred_phrases in preferred_phrases), "``preferred_phrases`` must be a list of strings"
-        assert isinstance(use_enhanced, bool), "``use_enhanced`` must be a boolean"
-        assert model is None or model in (None, "latest_long", "latest_short", "command_and_search", "phone_call", "video", "default", "medical_conversation", "medical_dictation"), "``model`` must be None or 'latest_long', 'latest_short', 'command_and_search', 'phone_call', 'video', or 'default'"
+
+        # Implementation of assertions of common api_params
+        if "use_enhanced" in api_params:
+            assert isinstance(api_params["use_enhanced"], bool), "``use_enhanced`` must be a boolean when used"
+
+        if "model" in api_params:
+            assert api_params["model"] in (None, "latest_long", "latest_short", "command_and_search", "phone_call", "video", "default", "medical_conversation", "medical_dictation"), "``model`` must be None or 'latest_long', 'latest_short', 'command_and_search', 'phone_call', 'video', or 'default'"
 
         try:
             import socket
@@ -719,8 +731,7 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en
             'encoding': speech.RecognitionConfig.AudioEncoding.FLAC,
             'sample_rate_hertz': audio_data.sample_rate,
             'language_code': language,
-            'use_enhanced': use_enhanced,
-            'model': model,
+            **api_params,
         }
         if preferred_phrases is not None:
             config['speechContexts'] = [speech.SpeechContext(

From db0da15888c75c28707ce76840b42426922bb0d2 Mon Sep 17 00:00:00 2001
From: Vitor Hideyoshi <vitor.h.n.batista@gmail.com>
Date: Wed, 18 Dec 2024 02:34:45 +0000
Subject: [PATCH 6/7] Removes Anti Pattern of Assertions for Data Validation

---
 speech_recognition/__init__.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index dbfd2a7b..d05a6285 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -724,13 +724,6 @@ def recognize_google_cloud(self, audio_data, credentials_json=None, language="en
         assert isinstance(language, str), "``language`` must be a string"
         assert preferred_phrases is None or all(isinstance(preferred_phrases, (type(""), type(u""))) for preferred_phrases in preferred_phrases), "``preferred_phrases`` must be a list of strings"
 
-        # Implementation of assertions of common api_params
-        if "use_enhanced" in api_params:
-            assert isinstance(api_params["use_enhanced"], bool), "``use_enhanced`` must be a boolean when used"
-
-        if "model" in api_params:
-            assert api_params["model"] in (None, "latest_long", "latest_short", "command_and_search", "phone_call", "video", "default", "medical_conversation", "medical_dictation"), "``model`` must be None or 'latest_long', 'latest_short', 'command_and_search', 'phone_call', 'video', or 'default'"
-
         try:
             import socket
 

From 8abafcb0ff0253e7d9973ccb544e22414f14f774 Mon Sep 17 00:00:00 2001
From: nikkie <takuyafjp+develop@gmail.com>
Date: Wed, 18 Dec 2024 23:41:43 +0900
Subject: [PATCH 7/7] style: Fix rstcheck's "Title underline too short."

---
 reference/library-reference.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/reference/library-reference.rst b/reference/library-reference.rst
index 7eef4108..296fc250 100644
--- a/reference/library-reference.rst
+++ b/reference/library-reference.rst
@@ -228,7 +228,7 @@ Returns the most likely transcription if ``show_all`` is false (the default). Ot
 Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
 
 ``recognizer_instance.recognize_google_cloud(audio_data: AudioData, credentials_json: Union[str, None] = None, language: str = "en-US", preferred_phrases: Union[Iterable[str], None] = None, show_all: bool = False, **api_params) -> Union[str, Dict[str, Any]]``
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 
 Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Google Cloud Speech API.