Skip to content

Commit

Permalink
Refactor to extend changes in OvosDinkumListener (#203)
Browse files Browse the repository at this point in the history
# Description
Update to use OVOSSttFactory directly
Mark neon_speech.stt module as deprecated
Update dependencies to latest stable versions
Includes patch to handle config updates when FileWatcher fails to report
changes

# Issues
Closes #168 
Relates to #158 

# Other Notes
Validated k8s alpha deployment
Tested local Docker instance
Tested against Mark2 latest beta

---------

Co-authored-by: Daniel McKnight <[email protected]>
  • Loading branch information
NeonDaniel and NeonDaniel authored Sep 20, 2024
1 parent 61e0b30 commit 5b274a9
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 43 deletions.
39 changes: 25 additions & 14 deletions neon_speech/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
from typing import Dict
from typing import Dict, List, Tuple

import ovos_dinkum_listener.plugins

Expand All @@ -48,9 +48,7 @@
from ovos_dinkum_listener.service import OVOSDinkumVoiceService
from ovos_dinkum_listener.voice_loop.voice_loop import ListeningMode

from neon_speech.stt import STTFactory

ovos_dinkum_listener.plugins.OVOSSTTFactory = STTFactory
from ovos_plugin_manager.stt import OVOSSTTFactory as STTFactory

_SERVICE_READY = Event()

Expand Down Expand Up @@ -131,8 +129,7 @@ def __init__(self, ready_hook=on_ready, error_hook=on_error,
self.lock = Lock()
self._stop_service = Event()
if self.config.get('listener', {}).get('enable_stt_api', True):
self.api_stt = STTFactory.create(config=self.config,
results_event=None)
self.api_stt = STTFactory.create(config=self.config)
else:
LOG.info("Skipping api_stt init")
self.api_stt = None
Expand Down Expand Up @@ -223,6 +220,16 @@ def register_event_handlers(self):
self.bus.on("neon.enable_wake_word", self.handle_enable_wake_word)
self.bus.on("neon.disable_wake_word", self.handle_disable_wake_word)

# TODO: Patching config reload behavior
self.bus.on("configuration.patch", self._patch_handle_config_reload)

def _patch_handle_config_reload(self, _: Message):
# This patches observed behavior where the filewatcher fails to trigger.
# Configuration reload is idempotent, so calling it again will have
# minimal impact
self.config.reload()
self.reload_configuration()

def _handle_get_languages_stt(self, message):
if self.config.get('listener', {}).get('enable_voice_loop', True):
return OVOSDinkumVoiceService._handle_get_languages_stt(self,
Expand Down Expand Up @@ -414,9 +421,11 @@ def handle_get_stt(self, message: Message):
message.context['timing']['client_to_core'] = \
received_time - sent_time
message.context['timing']['response_sent'] = time()
transcribed_str = [t[0] for t in transcriptions]
self.bus.emit(message.reply(ident,
data={"parser_data": parser_data,
"transcripts": transcriptions}))
"transcripts": transcribed_str,
"transcripts_with_conf": transcriptions}))
except Exception as e:
LOG.error(e)
message.context['timing']['response_sent'] = time()
Expand Down Expand Up @@ -467,8 +476,9 @@ def build_context(msg: Message):
message.context.setdefault('timing', dict())
message.context['timing'] = {**timing, **message.context['timing']}
context = build_context(message)
transribed_str = [t[0] for t in transcriptions]
data = {
"utterances": transcriptions,
"utterances": transribed_str,
"lang": message.data.get("lang", "en-us")
}
# Send a new message to the skills module with proper routing ctx
Expand All @@ -478,7 +488,8 @@ def build_context(msg: Message):
# Reply to original message with transcription/audio parser data
self.bus.emit(message.reply(ident,
data={"parser_data": parser_data,
"transcripts": transcriptions,
"transcripts": transribed_str,
"transcripts_with_conf": transcriptions,
"skills_recv": handled}))
except Exception as e:
LOG.error(e)
Expand Down Expand Up @@ -528,7 +539,7 @@ def _write_encoded_file(audio_data: str) -> str:
return wav_file_path

def _get_stt_from_file(self, wav_file: str,
lang: str = None) -> (AudioData, dict, list):
lang: str = None) -> (AudioData, dict, List[Tuple[str, float]]):
"""
Performs STT and audio processing on the specified wav_file
:param wav_file: wav audio file to process
Expand Down Expand Up @@ -562,18 +573,18 @@ def _get_stt_from_file(self, wav_file: str,
self.api_stt.stream_data(data)
except EOFError:
break
transcriptions = self.api_stt.stream_stop()
transcriptions = self.api_stt.transcribe(None, None)
self.lock.release()
else:
LOG.error(f"Timed out acquiring lock, not processing: {wav_file}")
transcriptions = []
else:
transcriptions = self.api_stt.execute(audio_data, lang)
transcriptions = self.api_stt.transcribe(audio_data, lang)
if isinstance(transcriptions, str):
LOG.warning("Transcriptions is a str, no alternatives provided")
LOG.error("Transcriptions is a str, no alternatives provided")
transcriptions = [transcriptions]

transcriptions = [clean_quotes(t) for t in transcriptions]
transcriptions = [(clean_quotes(t[0]), t[1]) for t in transcriptions]

get_stt = float(_stopwatch.time)
with _stopwatch:
Expand Down
12 changes: 11 additions & 1 deletion neon_speech/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,21 @@
from inspect import signature
from threading import Event

from neon_utils import LOG
from ovos_utils.log import LOG, log_deprecation
from ovos_plugin_manager.stt import OVOSSTTFactory, get_stt_config
from ovos_plugin_manager.templates.stt import StreamingSTT

from ovos_config.config import Configuration

log_deprecation("This module is deprecated. Import from `ovos_plugin_manager`",
"5.0.0")


class WrappedSTT(StreamingSTT, ABC):
def __new__(cls, base_engine, *args, **kwargs):
log_deprecation("This class is deprecated. Use "
"`ovos_plugin_manager.templates.stt.StreamingSTT",
"5.0.0")
results_event = kwargs.get("results_event") or Event()
# build STT
for k in list(kwargs.keys()):
Expand Down Expand Up @@ -66,6 +72,10 @@ def stream_stop(self):


class STTFactory(OVOSSTTFactory):
log_deprecation("This class is deprecated. Use "
"`ovos_plugin_manager.stt.OVOSSTTFactory",
"5.0.0")

@staticmethod
def create(config=None, results_event: Event = None):
get_stt_config(config)
Expand Down
1 change: 1 addition & 0 deletions neon_speech/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def init_stt_plugin(plugin: str):
LOG.warning(f"Could not find plugin: {plugin}")


@deprecated("Platform detection has been deprecated", "5.0.0")
def use_neon_speech(func):
"""
Wrapper to ensure call originates from neon_speech for stack checks.
Expand Down
2 changes: 1 addition & 1 deletion requirements/docker.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ovos-stt-plugin-vosk~=0.1
neon-stt-plugin-nemo~=0.0.2
neon-stt-plugin-nemo~=0.0.2,>=0.0.5a5
onnxruntime!=1.16.0 # TODO: Patching https://github.com/microsoft/onnxruntime/issues/17631

# Load alternative WW plugins so they are available
Expand Down
8 changes: 4 additions & 4 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
ovos-dinkum-listener~=0.0
ovos-dinkum-listener~=0.2
ovos-bus-client~=0.0,>=0.0.3
ovos-utils~=0.0,>=0.0.30
ovos-plugin-manager~=0.0,>=0.0.23
ovos-plugin-manager~=0.0,>=0.0.26a39
click~=8.0
click-default-group~=1.2
neon-utils[network,audio]~=1.9
neon-utils[network,audio]~=1.9,>=1.11.1a3
ovos-config~=0.0,>=0.0.7

ovos-vad-plugin-webrtcvad~=0.0.1
ovos-ww-plugin-vosk~=0.1
ovos-microphone-plugin-alsa~=0.0.0
ovos-microphone-plugin-alsa~=0.1
3 changes: 1 addition & 2 deletions requirements/test_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
neon-stt-plugin-deepspeech_stream_local~=2.0
neon-stt-plugin-nemo~=0.0,>=0.0.2
neon-stt-plugin-nemo~=0.0,>=0.0.5a4
ovos-stt-plugin-vosk~=0.1
ovos-stt-plugin-server~=0.0.3
pytest
Expand Down
6 changes: 3 additions & 3 deletions tests/api_method_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def setUpClass(cls) -> None:
use_neon_speech(init_config_dir)()

test_config = dict(Configuration())
test_config["stt"]["module"] = "deepspeech_stream_local"
test_config["stt"]["module"] = "neon-stt-plugin-nemo"
test_config["listener"]["VAD"]["module"] = "dummy"
assert test_config["stt"]["module"] == "deepspeech_stream_local"
assert test_config["stt"]["module"] == "neon-stt-plugin-nemo"

ready_event = Event()

Expand All @@ -77,7 +77,7 @@ def _ready():
cls.speech_service = NeonSpeechClient(speech_config=test_config,
daemonic=False, bus=cls.bus,
ready_hook=_ready)
assert cls.speech_service.config["stt"]["module"] == "deepspeech_stream_local"
assert cls.speech_service.config["stt"]["module"] == "neon-stt-plugin-nemo"
cls.speech_service.start()

if not ready_event.wait(120):
Expand Down
41 changes: 23 additions & 18 deletions tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@
import shutil
import sys
import unittest
import yaml

from os.path import dirname, join
from threading import Thread, Event
from unittest.mock import Mock, patch
from unittest import skip
from unittest.mock import patch
from click.testing import CliRunner

from ovos_bus_client import Message
Expand All @@ -44,7 +46,8 @@

CONFIG_PATH = os.path.join(dirname(__file__), "config")
os.environ["XDG_CONFIG_HOME"] = CONFIG_PATH

os.environ["OVOS_CONFIG_BASE_FOLDER"] = "neon"
os.environ["OVOS_CONFIG_FILENAME"] = "neon.yaml"

sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))

Expand Down Expand Up @@ -80,12 +83,13 @@ def test_install_stt_plugin(self):
"ovos-stt-plugin-vosk"))
import ovos_stt_plugin_vosk

@skip("Configuration patching is deprecated")
def test_patch_config(self):
from neon_speech.utils import use_neon_speech
from neon_utils.configuration_utils import init_config_dir
test_config_dir = os.path.join(os.path.dirname(__file__), "config")
os.makedirs(test_config_dir, exist_ok=True)
os.environ["XDG_CONFIG_HOME"] = test_config_dir

use_neon_speech(init_config_dir)()

with open(join(test_config_dir, "OpenVoiceOS", 'ovos.conf')) as f:
Expand Down Expand Up @@ -117,7 +121,7 @@ def test_get_stt_from_file(self):
AUDIO_FILE_PATH = os.path.join(os.path.dirname(
os.path.realpath(__file__)), "audio_files")
TEST_CONFIG = use_neon_speech(Configuration)()
TEST_CONFIG["stt"]["module"] = "deepspeech_stream_local"
TEST_CONFIG["stt"]["module"] = "neon-stt-plugin-nemo"
bus = FakeBus()
bus.connected_event = Event()
bus.connected_event.set()
Expand All @@ -129,15 +133,17 @@ def test_get_stt_from_file(self):
self.assertIsInstance(audio, AudioData)
self.assertIsInstance(context, dict)
self.assertIsInstance(transcripts, list)
self.assertIn("stop", transcripts)
tr_str = [t[0] for t in transcripts]
self.assertIn("stop", tr_str)

def threaded_get_stt():
audio, context, transcripts = \
client._get_stt_from_file(join(AUDIO_FILE_PATH, "stop.wav"))
self.assertIsInstance(audio, AudioData)
self.assertIsInstance(context, dict)
self.assertIsInstance(transcripts, list)
self.assertIn("stop", transcripts)
tr_str = [t[0] for t in transcripts]
self.assertIn("stop", tr_str)

threads = list()
for i in range(0, 12):
Expand All @@ -156,7 +162,7 @@ def test_ovos_plugin_compat(self):
ovos_vosk_streaming = STTFactory().create(
{'module': 'ovos-stt-plugin-vosk-streaming',
'lang': 'en-us'})
self.assertIsInstance(ovos_vosk_streaming.results_event, Event)
# self.assertIsInstance(ovos_vosk_streaming.results_event, Event)
test_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
"audio_files", "stop.wav")
from neon_utils.file_utils import get_audio_file_stream
Expand Down Expand Up @@ -225,24 +231,23 @@ def on_ready(cls):

@classmethod
def setUpClass(cls):
from ovos_config.config import update_mycroft_config
from neon_utils.configuration_utils import init_config_dir
init_config_dir()
os.makedirs(join(CONFIG_PATH, "neon"), exist_ok=True)
test_config = join(CONFIG_PATH, "neon", "neon.yaml")
with open(test_config, 'w+') as f:
yaml.dump({"hotwords": cls.hotwords_config,
"stt": {"module": "neon-stt-plugin-nemo"},
"VAD": {"module": "dummy"}}, f)

update_mycroft_config({"hotwords": cls.hotwords_config,
"stt": {"module": "neon-stt-plugin-nemo"},
"VAD": {"module": "dummy"}})
import importlib
import ovos_config.config
importlib.reload(ovos_config.config)
# from ovos_config.config import Configuration
# assert Configuration.xdg_configs[0]['hotwords'] == hotwords_config
from ovos_config.config import Configuration
assert Configuration.xdg_configs[0]['hotwords'] == cls.hotwords_config

from neon_speech.utils import use_neon_speech
use_neon_speech(init_config_dir)()
from neon_speech.service import NeonSpeechClient
cls.service = NeonSpeechClient(bus=cls.bus, ready_hook=cls.on_ready)
# assert Configuration() == service.loop.config_core

assert cls.service.reload_configuration in Configuration._callbacks

def _mocked_run():
stopping_event = Event()
Expand Down

0 comments on commit 5b274a9

Please sign in to comment.