From d7d3f128ccc5e19198e15cdba2cb922d98c88072 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 16:15:11 +0200
Subject: [PATCH 01/13] Added vosk

---
 speech_recognition/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 8eaabf94..a015e7d1 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1390,7 +1390,8 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
             for node_id in top_k:
                 human_string = self.tflabels[node_id]
                 return human_string
-
+    def recognize_vosk(self, audio_data, language='en'):
+        from vosk import Model, KaldiRecognizer
 
 def get_flac_converter():
     """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""

From f0eb1ba3573b35b24c113951c654a6fc7d75ef38 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 16:16:54 +0200
Subject: [PATCH 02/13] Added 2 more recognition services

---
 README.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.rst b/README.rst
index 0d0322b9..4434dc21 100644
--- a/README.rst
+++ b/README.rst
@@ -34,6 +34,8 @@ Speech recognition engine/API support:
 * `Houndify API <https://houndify.com/>`__
 * `IBM Speech to Text <http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/speech-to-text.html>`__
 * `Snowboy Hotword Detection <https://snowboy.kitt.ai/>`__ (works offline)
+* `Tensorflow <https://www.tensorflow.org/>`__
+* `Vosk API <https://github.com/alphacep/vosk-api/>`__ (works offline)
 
 **Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details.
 

From af9373883cf7a02f11129e458d8a86dc8364f6b4 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 18:45:27 +0200
Subject: [PATCH 03/13] Update of outdated README

---
 README.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.rst b/README.rst
index 4434dc21..b9ceb8c7 100644
--- a/README.rst
+++ b/README.rst
@@ -88,6 +88,7 @@ To use all of the functionality of the library, you should have:
 * **PocketSphinx** (required only if you need to use the Sphinx recognizer, ``recognizer_instance.recognize_sphinx``)
 * **Google API Client Library for Python** (required only if you need to use the Google Cloud Speech API, ``recognizer_instance.recognize_google_cloud``)
 * **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X)
+* **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``)
 
 The following requirements are optional, but can improve or extend functionality in some situations:
 

From f726da68c5015efe3548b59743306590167e47c3 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 19:36:34 +0200
Subject: [PATCH 04/13] Added Vosk API

You can now simply recognize with:

recognize_vosk()
---
 speech_recognition/__init__.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index a015e7d1..f3664054 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1390,9 +1390,39 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
             for node_id in top_k:
                 human_string = self.tflabels[node_id]
                 return human_string
+            
     def recognize_vosk(self, audio_data, language='en'):
         from vosk import Model, KaldiRecognizer
 
+        if not os.path.exists("model"):
+            return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
+            exit (1)
+
+        import pyaudio
+
+        model = Model("model")
+        rec = KaldiRecognizer(model, 16000)
+
+        p = pyaudio.PyAudio()
+        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
+        stream.start_stream()
+
+        while True:
+            data = stream.read(4000)
+            if len(data) == 0:
+                break
+            if rec.AcceptWaveform(data):
+                #bottom lines are for debugging
+                #print(rec.Result())
+                break
+            else:
+                #bottom lines are for debugging
+                #print(rec.PartialResult())
+                break
+
+        finalRecognition = rec.FinalResult()
+        return finalRecognition
+
 def get_flac_converter():
     """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
     flac_converter = shutil_which("flac")  # check for installed version first

From 44d17b1886333f7d033c190dcd0eb8b7e492d898 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 19:52:35 +0200
Subject: [PATCH 05/13] Update of 1st review

---
 speech_recognition/__init__.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index f3664054..55a8ee68 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -25,6 +25,9 @@
 __version__ = "3.8.1"
 __license__ = "BSD"
 
+# model for Vosk
+modelVosk = Model("model")
+
 try:  # attempt to use the Python 2 modules
     from urllib import urlencode
     from urllib2 import Request, urlopen, URLError, HTTPError
@@ -1400,8 +1403,7 @@ def recognize_vosk(self, audio_data, language='en'):
 
         import pyaudio
 
-        model = Model("model")
-        rec = KaldiRecognizer(model, 16000)
+        rec = KaldiRecognizer(modelVosk, 16000)
 
         p = pyaudio.PyAudio()
         stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)

From 555cbaf854e47b6d9455a359754512047fe2d2fa Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:24:03 +0200
Subject: [PATCH 06/13] Update of vosk with help of @nshmyrev

Part 1
---
 speech_recognition/__init__.py | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 55a8ee68..ce2fc0da 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1400,29 +1400,13 @@ def recognize_vosk(self, audio_data, language='en'):
         if not os.path.exists("model"):
             return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
             exit (1)
-
+        
+        assert isinstance(audio_data, AudioData), "Data must be audio data"
         import pyaudio
-
-        rec = KaldiRecognizer(modelVosk, 16000)
-
-        p = pyaudio.PyAudio()
-        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
-        stream.start_stream()
-
-        while True:
-            data = stream.read(4000)
-            if len(data) == 0:
-                break
-            if rec.AcceptWaveform(data):
-                #bottom lines are for debugging
-                #print(rec.Result())
-                break
-            else:
-                #bottom lines are for debugging
-                #print(rec.PartialResult())
-                break
-
+        
+        rec.AcceptWaveform(audio_data.get_raw_data(convert_rate=16000, convert_width=2));
         finalRecognition = rec.FinalResult()
+        
         return finalRecognition
 
 def get_flac_converter():

From 696ddb286f2526b29d175c1a95c9dd76c75ccbda Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:26:05 +0200
Subject: [PATCH 07/13] Update of vosk with help of @nshmyrev

Part 2
---
 speech_recognition/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index ce2fc0da..00a34e8e 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1404,6 +1404,11 @@ def recognize_vosk(self, audio_data, language='en'):
         assert isinstance(audio_data, AudioData), "Data must be audio data"
         import pyaudio
         
+        if not hasattr(self, 'vosk_model'):
+            self.vosk_model = Model()
+
+        rec = KaldiRecognizer(self.vosk_model, 16000);
+        
         rec.AcceptWaveform(audio_data.get_raw_data(convert_rate=16000, convert_width=2));
         finalRecognition = rec.FinalResult()
         

From a8f270788053fa38ca58d1ac7d1ec36608169bab Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:27:01 +0200
Subject: [PATCH 08/13] Update of vosk with help of @nshmyrev

Part 3
---
 speech_recognition/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 00a34e8e..17f4e5bb 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1402,7 +1402,6 @@ def recognize_vosk(self, audio_data, language='en'):
             exit (1)
         
         assert isinstance(audio_data, AudioData), "Data must be audio data"
-        import pyaudio
         
         if not hasattr(self, 'vosk_model'):
             self.vosk_model = Model()

From a4c29cbf45eb3a225a91d0aa2062241e5fff4530 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:28:13 +0200
Subject: [PATCH 09/13] Update of vosk with help of @nshmyrev

Part 4
---
 speech_recognition/__init__.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 17f4e5bb..77f5d385 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -25,9 +25,6 @@
 __version__ = "3.8.1"
 __license__ = "BSD"
 
-# model for Vosk
-modelVosk = Model("model")
-
 try:  # attempt to use the Python 2 modules
     from urllib import urlencode
     from urllib2 import Request, urlopen, URLError, HTTPError

From ce3023f4c17aed92e8ffc957b6ba16a16335cb6c Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:37:09 +0200
Subject: [PATCH 10/13] Update of vosk with help of @nshmyrev

Last part - Updating readme
---
 README.rst | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/README.rst b/README.rst
index b9ceb8c7..4b224960 100644
--- a/README.rst
+++ b/README.rst
@@ -54,6 +54,8 @@ The `library reference <https://github.com/Uberi/speech_recognition/blob/master/
 
 See `Notes on using PocketSphinx <https://github.com/Uberi/speech_recognition/blob/master/reference/pocketsphinx.rst>`__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``.
 
+You have to install Vosk models for using Vosk. `Here <https://alphacephei.com/vosk/models>`__ are models avaiable. You have to place them in models folder of your project, like "your-project-folder/models/your-vosk-model"
+
 Examples
 --------
 
@@ -132,6 +134,16 @@ Note that the versions available in most package repositories are outdated and w
 
 See `Notes on using PocketSphinx <https://github.com/Uberi/speech_recognition/blob/master/reference/pocketsphinx.rst>`__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``.
 
+Vosk (for Vosk users)
+~~~~~~~~~~~~~~~~~~~~~
+Vosk API is **required if and only if you want to use Vosk recognizer** (``recognizer_instance.recognize_vosk``).
+
+You can install it with ``python3 -m pip install vosk``.
+
+You also have to install Vosk Models:
+
+`Here <https://alphacephei.com/vosk/models>`__ are models avaiable for download. You have to place them in models folder of your project, like "your-project-folder/models/your-vosk-model"
+
 Google Cloud Speech Library for Python (for Google Cloud Speech API users)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

From dfad80ef4563002d96a9f85bee34bea81a893ad9 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:40:28 +0200
Subject: [PATCH 11/13] Update of vosk with help of @nshmyrev

Hoping that it's the last part
---
 speech_recognition/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 77f5d385..63ea2a39 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1393,14 +1393,14 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
             
     def recognize_vosk(self, audio_data, language='en'):
         from vosk import Model, KaldiRecognizer
-
-        if not os.path.exists("model"):
-            return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
-            exit (1)
         
         assert isinstance(audio_data, AudioData), "Data must be audio data"
+        assert isinstance(language, AudioData), "Language data"
         
         if not hasattr(self, 'vosk_model'):
+            if not os.path.exists("model"):
+                return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
+                exit (1)
             self.vosk_model = Model()
 
         rec = KaldiRecognizer(self.vosk_model, 16000);

From a1a7a14a88bdab3fd7b49fc1b92ca887570369f0 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:41:07 +0200
Subject: [PATCH 12/13] Update of vosk with help of @nshmyrev

Fix, because i screwed up
---
 speech_recognition/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 63ea2a39..095d8e89 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1395,7 +1395,7 @@ def recognize_vosk(self, audio_data, language='en'):
         from vosk import Model, KaldiRecognizer
         
         assert isinstance(audio_data, AudioData), "Data must be audio data"
-        assert isinstance(language, AudioData), "Language data"
+        assert isinstance(language, str), "Language data"
         
         if not hasattr(self, 'vosk_model'):
             if not os.path.exists("model"):

From 274f5eb05bb56ffa591c402b19b6c230af9c5dff Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Sat, 19 Sep 2020 10:33:59 +0200
Subject: [PATCH 13/13] Final update with help of @nshmyrev

Finnaly final update
---
 speech_recognition/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 095d8e89..37b17292 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1395,13 +1395,12 @@ def recognize_vosk(self, audio_data, language='en'):
         from vosk import Model, KaldiRecognizer
         
         assert isinstance(audio_data, AudioData), "Data must be audio data"
-        assert isinstance(language, str), "Language data"
         
         if not hasattr(self, 'vosk_model'):
             if not os.path.exists("model"):
                 return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
                 exit (1)
-            self.vosk_model = Model()
+            self.vosk_model = Model("model")
 
         rec = KaldiRecognizer(self.vosk_model, 16000);