From d7d3f128ccc5e19198e15cdba2cb922d98c88072 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 16:15:11 +0200
Subject: [PATCH 01/13] Added vosk
---
speech_recognition/__init__.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 8eaabf94..a015e7d1 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1390,7 +1390,8 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
for node_id in top_k:
human_string = self.tflabels[node_id]
return human_string
-
+ def recognize_vosk(self, audio_data, language='en'):
+ from vosk import Model, KaldiRecognizer
def get_flac_converter():
"""Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
From f0eb1ba3573b35b24c113951c654a6fc7d75ef38 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 16:16:54 +0200
Subject: [PATCH 02/13] Added 2 more recognition services
---
README.rst | 2 ++
1 file changed, 2 insertions(+)
diff --git a/README.rst b/README.rst
index 0d0322b9..4434dc21 100644
--- a/README.rst
+++ b/README.rst
@@ -34,6 +34,8 @@ Speech recognition engine/API support:
* `Houndify API `__
* `IBM Speech to Text `__
* `Snowboy Hotword Detection `__ (works offline)
+* `Tensorflow `__
+* `Vosk API `__ (works offline)
**Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details.
From af9373883cf7a02f11129e458d8a86dc8364f6b4 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 18:45:27 +0200
Subject: [PATCH 03/13] Update of outdated README
---
README.rst | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.rst b/README.rst
index 4434dc21..b9ceb8c7 100644
--- a/README.rst
+++ b/README.rst
@@ -88,6 +88,7 @@ To use all of the functionality of the library, you should have:
* **PocketSphinx** (required only if you need to use the Sphinx recognizer, ``recognizer_instance.recognize_sphinx``)
* **Google API Client Library for Python** (required only if you need to use the Google Cloud Speech API, ``recognizer_instance.recognize_google_cloud``)
* **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X)
+* **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``)
The following requirements are optional, but can improve or extend functionality in some situations:
From f726da68c5015efe3548b59743306590167e47c3 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 19:36:34 +0200
Subject: [PATCH 04/13] Added Vosk API
You can now simply recognize with:
recognize_vosk()
---
speech_recognition/__init__.py | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index a015e7d1..f3664054 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1390,9 +1390,39 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
for node_id in top_k:
human_string = self.tflabels[node_id]
return human_string
+
def recognize_vosk(self, audio_data, language='en'):
from vosk import Model, KaldiRecognizer
+ if not os.path.exists("model"):
+ return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
+ exit (1)
+
+ import pyaudio
+
+ model = Model("model")
+ rec = KaldiRecognizer(model, 16000)
+
+ p = pyaudio.PyAudio()
+ stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
+ stream.start_stream()
+
+ while True:
+ data = stream.read(4000)
+ if len(data) == 0:
+ break
+ if rec.AcceptWaveform(data):
+ #bottom lines are for debugging
+ #print(rec.Result())
+ break
+ else:
+ #bottom lines are for debugging
+ #print(rec.PartialResult())
+ break
+
+ finalRecognition = rec.FinalResult()
+ return finalRecognition
+
def get_flac_converter():
"""Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
flac_converter = shutil_which("flac") # check for installed version first
From 44d17b1886333f7d033c190dcd0eb8b7e492d898 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 19:52:35 +0200
Subject: [PATCH 05/13] Update of 1st review
---
speech_recognition/__init__.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index f3664054..55a8ee68 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -25,6 +25,9 @@
__version__ = "3.8.1"
__license__ = "BSD"
+# model for Vosk
+modelVosk = Model("model")
+
try: # attempt to use the Python 2 modules
from urllib import urlencode
from urllib2 import Request, urlopen, URLError, HTTPError
@@ -1400,8 +1403,7 @@ def recognize_vosk(self, audio_data, language='en'):
import pyaudio
- model = Model("model")
- rec = KaldiRecognizer(model, 16000)
+ rec = KaldiRecognizer(modelVosk, 16000)
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
From 555cbaf854e47b6d9455a359754512047fe2d2fa Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:24:03 +0200
Subject: [PATCH 06/13] Update of vosk with help of @nshmyrev
Part 1
---
speech_recognition/__init__.py | 26 +++++---------------------
1 file changed, 5 insertions(+), 21 deletions(-)
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 55a8ee68..ce2fc0da 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1400,29 +1400,13 @@ def recognize_vosk(self, audio_data, language='en'):
if not os.path.exists("model"):
return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
exit (1)
-
+
+ assert isinstance(audio_data, AudioData), "Data must be audio data"
import pyaudio
-
- rec = KaldiRecognizer(modelVosk, 16000)
-
- p = pyaudio.PyAudio()
- stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
- stream.start_stream()
-
- while True:
- data = stream.read(4000)
- if len(data) == 0:
- break
- if rec.AcceptWaveform(data):
- #bottom lines are for debugging
- #print(rec.Result())
- break
- else:
- #bottom lines are for debugging
- #print(rec.PartialResult())
- break
-
+
+ rec.AcceptWaveform(audio_data.get_raw_data(convert_rate=16000, convert_width=2));
finalRecognition = rec.FinalResult()
+
return finalRecognition
def get_flac_converter():
From 696ddb286f2526b29d175c1a95c9dd76c75ccbda Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:26:05 +0200
Subject: [PATCH 07/13] Update of vosk with help of @nshmyrev
Part 2
---
speech_recognition/__init__.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index ce2fc0da..00a34e8e 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1404,6 +1404,11 @@ def recognize_vosk(self, audio_data, language='en'):
assert isinstance(audio_data, AudioData), "Data must be audio data"
import pyaudio
+ if not hasattr(self, 'vosk_model'):
+ self.vosk_model = Model()
+
+ rec = KaldiRecognizer(self.vosk_model, 16000);
+
rec.AcceptWaveform(audio_data.get_raw_data(convert_rate=16000, convert_width=2));
finalRecognition = rec.FinalResult()
From a8f270788053fa38ca58d1ac7d1ec36608169bab Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:27:01 +0200
Subject: [PATCH 08/13] Update of vosk with help of @nshmyrev
Part 3
---
speech_recognition/__init__.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 00a34e8e..17f4e5bb 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1402,7 +1402,6 @@ def recognize_vosk(self, audio_data, language='en'):
exit (1)
assert isinstance(audio_data, AudioData), "Data must be audio data"
- import pyaudio
if not hasattr(self, 'vosk_model'):
self.vosk_model = Model()
From a4c29cbf45eb3a225a91d0aa2062241e5fff4530 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:28:13 +0200
Subject: [PATCH 09/13] Update of vosk with help of @nshmyrev
Part 4
---
speech_recognition/__init__.py | 3 ---
1 file changed, 3 deletions(-)
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 17f4e5bb..77f5d385 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -25,9 +25,6 @@
__version__ = "3.8.1"
__license__ = "BSD"
-# model for Vosk
-modelVosk = Model("model")
-
try: # attempt to use the Python 2 modules
from urllib import urlencode
from urllib2 import Request, urlopen, URLError, HTTPError
From ce3023f4c17aed92e8ffc957b6ba16a16335cb6c Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:37:09 +0200
Subject: [PATCH 10/13] Update of vosk with help of @nshmyrev
Last part - Updating readme
---
README.rst | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/README.rst b/README.rst
index b9ceb8c7..4b224960 100644
--- a/README.rst
+++ b/README.rst
@@ -54,6 +54,8 @@ The `library reference `__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``.
+You have to install Vosk models for using Vosk. `Here `__ are models avaiable. You have to place them in models folder of your project, like "your-project-folder/models/your-vosk-model"
+
Examples
--------
@@ -132,6 +134,16 @@ Note that the versions available in most package repositories are outdated and w
See `Notes on using PocketSphinx `__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``.
+Vosk (for Vosk users)
+~~~~~~~~~~~~~~~~~~~~~
+Vosk API is **required if and only if you want to use Vosk recognizer** (``recognizer_instance.recognize_vosk``).
+
+You can install it with ``python3 -m pip install vosk``.
+
+You also have to install Vosk Models:
+
+`Here `__ are models avaiable for download. You have to place them in models folder of your project, like "your-project-folder/models/your-vosk-model"
+
Google Cloud Speech Library for Python (for Google Cloud Speech API users)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
From dfad80ef4563002d96a9f85bee34bea81a893ad9 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:40:28 +0200
Subject: [PATCH 11/13] Update of vosk with help of @nshmyrev
Hoping that it's the last part
---
speech_recognition/__init__.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 77f5d385..63ea2a39 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1393,14 +1393,14 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac
def recognize_vosk(self, audio_data, language='en'):
from vosk import Model, KaldiRecognizer
-
- if not os.path.exists("model"):
- return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
- exit (1)
assert isinstance(audio_data, AudioData), "Data must be audio data"
+ assert isinstance(language, AudioData), "Language data"
if not hasattr(self, 'vosk_model'):
+ if not os.path.exists("model"):
+ return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
+ exit (1)
self.vosk_model = Model()
rec = KaldiRecognizer(self.vosk_model, 16000);
From a1a7a14a88bdab3fd7b49fc1b92ca887570369f0 Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Fri, 18 Sep 2020 20:41:07 +0200
Subject: [PATCH 12/13] Update of vosk with help of @nshmyrev
Fix, because i screwed up
---
speech_recognition/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 63ea2a39..095d8e89 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1395,7 +1395,7 @@ def recognize_vosk(self, audio_data, language='en'):
from vosk import Model, KaldiRecognizer
assert isinstance(audio_data, AudioData), "Data must be audio data"
- assert isinstance(language, AudioData), "Language data"
+ assert isinstance(language, str), "Language data"
if not hasattr(self, 'vosk_model'):
if not os.path.exists("model"):
From 274f5eb05bb56ffa591c402b19b6c230af9c5dff Mon Sep 17 00:00:00 2001
From: mytja <52399966+mytja@users.noreply.github.com>
Date: Sat, 19 Sep 2020 10:33:59 +0200
Subject: [PATCH 13/13] Final update with help of @nshmyrev
Finnaly final update
---
speech_recognition/__init__.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 095d8e89..37b17292 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1395,13 +1395,12 @@ def recognize_vosk(self, audio_data, language='en'):
from vosk import Model, KaldiRecognizer
assert isinstance(audio_data, AudioData), "Data must be audio data"
- assert isinstance(language, str), "Language data"
if not hasattr(self, 'vosk_model'):
if not os.path.exists("model"):
return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder."
exit (1)
- self.vosk_model = Model()
+ self.vosk_model = Model("model")
rec = KaldiRecognizer(self.vosk_model, 16000);