diff --git a/README.rst b/README.rst index 0d0322b9..4b224960 100644 --- a/README.rst +++ b/README.rst @@ -34,6 +34,8 @@ Speech recognition engine/API support: * `Houndify API `__ * `IBM Speech to Text `__ * `Snowboy Hotword Detection `__ (works offline) +* `Tensorflow `__ +* `Vosk API `__ (works offline) **Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details. @@ -52,6 +54,8 @@ The `library reference `__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``. +You have to install Vosk models for using Vosk. `Here `__ are models avaiable. You have to place them in models folder of your project, like "your-project-folder/models/your-vosk-model" + Examples -------- @@ -86,6 +90,7 @@ To use all of the functionality of the library, you should have: * **PocketSphinx** (required only if you need to use the Sphinx recognizer, ``recognizer_instance.recognize_sphinx``) * **Google API Client Library for Python** (required only if you need to use the Google Cloud Speech API, ``recognizer_instance.recognize_google_cloud``) * **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X) +* **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``) The following requirements are optional, but can improve or extend functionality in some situations: @@ -129,6 +134,16 @@ Note that the versions available in most package repositories are outdated and w See `Notes on using PocketSphinx `__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``. +Vosk (for Vosk users) +~~~~~~~~~~~~~~~~~~~~~ +Vosk API is **required if and only if you want to use Vosk recognizer** (``recognizer_instance.recognize_vosk``). + +You can install it with ``python3 -m pip install vosk``. + +You also have to install Vosk Models: + +`Here `__ are models avaiable for download. You have to place them in models folder of your project, like "your-project-folder/models/your-vosk-model" + Google Cloud Speech Library for Python (for Google Cloud Speech API users) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py index 8eaabf94..37b17292 100644 --- a/speech_recognition/__init__.py +++ b/speech_recognition/__init__.py @@ -1390,7 +1390,24 @@ def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_ac for node_id in top_k: human_string = self.tflabels[node_id] return human_string - + + def recognize_vosk(self, audio_data, language='en'): + from vosk import Model, KaldiRecognizer + + assert isinstance(audio_data, AudioData), "Data must be audio data" + + if not hasattr(self, 'vosk_model'): + if not os.path.exists("model"): + return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder." + exit (1) + self.vosk_model = Model("model") + + rec = KaldiRecognizer(self.vosk_model, 16000); + + rec.AcceptWaveform(audio_data.get_raw_data(convert_rate=16000, convert_width=2)); + finalRecognition = rec.FinalResult() + + return finalRecognition def get_flac_converter(): """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""