Merge pull request #28 from OpenVoiceOS/release-0.1.0a1

Release 0.1.0a1
OpenVoiceOS · Nov 25, 2024 · 5f4d039 · 5f4d039
2 parents c56c16f + 0b6f1ef
commit 5f4d039
Show file tree

Hide file tree

Showing 6 changed files with 111 additions and 41 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,16 +1,16 @@
 # Changelog
 
-## [0.0.6a1](https://github.com/OpenVoiceOS/ovos-stt-server-plugin/tree/0.0.6a1) (2024-11-06)
+## [0.1.0a1](https://github.com/OpenVoiceOS/ovos-stt-server-plugin/tree/0.1.0a1) (2024-11-25)
 
-[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-server-plugin/compare/V0.0.5...0.0.6a1)
+[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-server-plugin/compare/V0.0.6...0.1.0a1)
 
 **Merged pull requests:**
 
-- fix: whisper turbo only [\#25](https://github.com/OpenVoiceOS/ovos-stt-server-plugin/pull/25) ([JarbasAl](https://github.com/JarbasAl))
+- feat:lang detect servers [\#27](https://github.com/OpenVoiceOS/ovos-stt-server-plugin/pull/27) ([JarbasAl](https://github.com/JarbasAl))
 
-## [V0.0.5](https://github.com/OpenVoiceOS/ovos-stt-server-plugin/tree/V0.0.5) (2024-09-11)
+## [V0.0.6](https://github.com/OpenVoiceOS/ovos-stt-server-plugin/tree/V0.0.6) (2024-11-06)
 
-[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-server-plugin/compare/0.0.5...V0.0.5)
+[Full Changelog](https://github.com/OpenVoiceOS/ovos-stt-server-plugin/compare/0.0.6...V0.0.6)
 
 
 

diff --git a/README.md b/README.md
@@ -13,11 +13,26 @@ pip install ovos-stt-plugin-server
 ```json
   "stt": {
     "module": "ovos-stt-plugin-server",
-    "ovos-stt-plugin-server": {"url": "https://0.0.0.0:8080/stt"},
-    "verify_ssl": true
+    "ovos-stt-plugin-server": {
+      "url": "https://0.0.0.0:8080/stt",
+      "verify_ssl": true
+    },
  }
 ```
 
+for audio language detection
+
+```json
+  "listener": {
+    "audio_transformers": {
+        "ovos-audio-lang-server-plugin": {
+          "url": "https://0.0.0.0:8080/lang_detect",
+          "verify_ssl": true
+        }
+    }
+  }
+```
+
 ### Security warning
 
 Please note that while you can set `verify_ssl` to `false` to disable SSL
@@ -29,27 +44,7 @@ purposes. Consider using a private CA or certificates signed using
 
 public server status page can be found at https://github.com/OpenVoiceOS/status
 
-the default public servers run [Whisper](https://github.com/OpenVoiceOS/ovos-stt-plugin-fasterwhisper), but [Nemo](https://github.com/NeonGeckoCom/neon-stt-plugin-nemo) is also available
-
-- https://stt.smartgic.io/nemo/stt
+the default public servers run [Whisper](https://github.com/OpenVoiceOS/ovos-stt-plugin-fasterwhisper)
 
 While there are associated risks with public servers, we value your trust in our products, learn more in Jarbas blog post [The Trust Factor in Public Servers](https://jarbasal.github.io/blog/2023/10/14/the-trust-factor-in-public-servers.html)
 
-## Docker
-
-see [google-stt-proxy](https://github.com/OpenVoiceOS/ovos-stt-plugin-chromium/pkgs/container/google-stt-proxy) for an example
-
-```dockerfile
-FROM debian:buster-slim
-
-RUN apt-get update && \
-  apt-get install -y git python3 python3-dev python3-pip curl build-essential
-
-RUN pip3 install ovos-stt-http-server==0.0.2a1
-RUN pip3 install SpeechRecognition==3.8.1
-
-COPY . /tmp/ovos-stt-chromium
-RUN pip3 install /tmp/ovos-stt-chromium
-
-ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-chromium
-```tts
diff --git a/ovos_stt_plugin_server/__init__.py b/ovos_stt_plugin_server/__init__.py
@@ -1,10 +1,72 @@
-from typing import Optional, List
+import random
+import time
+from typing import Optional, List, Tuple
 
 import requests
-import random
-from requests.utils import default_user_agent
-from ovos_utils.log import LOG
+from ovos_config import Configuration
 from ovos_plugin_manager.stt import STT
+from ovos_plugin_manager.templates.transformers import AudioLanguageDetector
+from ovos_utils.log import LOG
+from requests.utils import default_user_agent
+from speech_recognition import AudioData
+
+
+class OVOSServerLangClassifier(AudioLanguageDetector):
+    def __init__(self, config=None):
+        super().__init__("ovos-audio-lang-server-plugin", 10, config)
+
+    @property
+    def verify_ssl(self) -> bool:
+        return self.config.get("verify_ssl", True)
+
+    @property
+    def user_agent(self) -> str:
+        return self.config.get("user_agent") or default_user_agent()
+
+    @property
+    def urls(self) -> Optional[List[str]]:
+        urls = self.config.get("urls") or []
+        if urls and not isinstance(urls, list):
+            urls = [urls]
+        return urls
+
+    @property
+    def public_servers(self):
+        return ["https://stt.smartgic.io/fasterwhisper/lang_detect"]
+
+    def detect(self, audio_data: bytes, valid_langs=None) -> Tuple[str, float]:
+        valid_langs = valid_langs or self.valid_langs
+        if len(valid_langs) == 1:
+            return valid_langs[0], 1.0
+        if isinstance(audio_data, AudioData):
+            audio_data = audio_data.get_wav_data()
+        if self.urls:
+            LOG.debug(f"Using user defined urls {self.urls}")
+            urls = self.urls
+        else:
+            LOG.debug(f"Using public servers {self.public_servers}")
+            urls = self.public_servers
+            random.shuffle(urls)
+
+        for url in urls:
+            LOG.debug(f"chosen url {url}")
+            try:
+                response = requests.post(url, data=audio_data,
+                                         headers={"Content-Type": "audio/wav",
+                                                  "User-Agent": self.user_agent},
+                                         params={"valid_langs": ",".join(valid_langs)},
+                                         timeout=self.config.get("timeout", 5),
+                                         verify=self.verify_ssl)
+                if not response.ok:
+                    LOG.error(f"{response.status_code} response from {url}: "
+                              f"{response.content}")
+                else:
+                    data = response.json()
+                    return data["lang"], data["conf"]
+            except Exception as e:
+                LOG.exception(e)
+            LOG.error(f"Lang detect request to {url} failed")
+        return Configuration().get("lang"), 0.0
 
 
 class OVOSHTTPServerSTT(STT):
@@ -15,6 +77,7 @@ def __init__(self, config=None):
         if not self.verify_ssl:
             LOG.warning("SSL verification disabled, this is not secure and should"
                         "only be used for test systems! Please set up a valid certificate!")
+        self._detector = OVOSServerLangClassifier()
 
     @property
     def verify_ssl(self) -> bool:
@@ -54,6 +117,7 @@ def execute(self, audio, language=None):
                                          headers={"Content-Type": "audio/wav",
                                                   "User-Agent": self.user_agent},
                                          params={"lang": language or self.lang},
+                                         timeout=self.config.get("timeout", 5),
                                          verify=self.verify_ssl)
                 if not response.ok:
                     LOG.error(f"{response.status_code} response from {url}: "
@@ -171,11 +235,22 @@ def execute(self, audio, language=None):
     from speech_recognition import Recognizer, AudioFile
 
     engine = OVOSHTTPServerSTT()
+    d = OVOSServerLangClassifier()
 
     # inference
     jfk = "/home/miro/PycharmProjects/ovos-stt-plugin-fasterwhisper/jfk.wav"
+    ca = "/home/miro/PycharmProjects/ovos-stt-plugin-vosk/example.wav"
     with AudioFile(jfk) as source:
         audio = Recognizer().record(source)
 
-    pred = engine.execute(audio)
+    s = time.monotonic()
+    pred = d.detect(audio, valid_langs=["en", "es", "ca"])
+    e = time.monotonic() - s
+    print(pred)
+    print(f"took {e} seconds")
+
+    s = time.monotonic()
+    pred = engine.execute(audio, language="ca")
+    e = time.monotonic() - s
     print(pred)
+    print(f"took {e} seconds")
diff --git a/ovos_stt_plugin_server/version.py b/ovos_stt_plugin_server/version.py
@@ -1,6 +1,6 @@
 # START_VERSION_BLOCK
 VERSION_MAJOR = 0
-VERSION_MINOR = 0
-VERSION_BUILD = 6
-VERSION_ALPHA = 0
+VERSION_MINOR = 1
+VERSION_BUILD = 0
+VERSION_ALPHA = 1
 # END_VERSION_BLOCK
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -1 +1 @@
-ovos-plugin-manager>=0.0.5,<1.0.0
+ovos-plugin-manager>=0.7.0,<1.0.0
diff --git a/setup.py b/setup.py
@@ -4,10 +4,9 @@
 
 
 BASEDIR = os.path.abspath(os.path.dirname(__file__))
-PLUGIN_ENTRY_POINT = (
-    'ovos-stt-plugin-server = ovos_stt_plugin_server:OVOSHTTPServerSTT'
-)
+PLUGIN_ENTRY_POINT = 'ovos-stt-plugin-server = ovos_stt_plugin_server:OVOSHTTPServerSTT'
 CONFIG_ENTRY_POINT = 'ovos-stt-plugin-server.config = ovos_stt_plugin_server:OVOSHTTPServerSTTConfig'
+LANG_PLUGIN_ENTRY_POINT = 'ovos-audio-lang-server-plugin = ovos_stt_plugin_server:OVOSServerLangClassifier'
 
 
 def get_version():
@@ -85,5 +84,6 @@ def required(requirements_file):
     ],
     keywords='mycroft OpenVoiceOS OVOS plugin stt',
     entry_points={'mycroft.plugin.stt': PLUGIN_ENTRY_POINT,
-                  'mycroft.plugin.stt.config': CONFIG_ENTRY_POINT}
+                  'mycroft.plugin.stt.config': CONFIG_ENTRY_POINT,
+                  'neon.plugin.audio': LANG_PLUGIN_ENTRY_POINT}
 )
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		ovos-plugin-manager>=0.0.5,<1.0.0
		ovos-plugin-manager>=0.7.0,<1.0.0