Skip to content

Commit

Permalink
fix/ensure_min_audio_len
Browse files Browse the repository at this point in the history
fixes #110
  • Loading branch information
JarbasAl committed Jun 2, 2024
1 parent 1e5aa2e commit db2be26
Showing 1 changed file with 16 additions and 7 deletions.
23 changes: 16 additions & 7 deletions ovos_dinkum_listener/voice_loop/voice_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,15 +693,24 @@ def _after_cmd(self, chunk: bytes):
"""
# Command has ended, call transformers pipeline before STT
chunk, stt_context = self.transformers.transform(chunk)

if isinstance(self.stt, FakeStreamingSTT) and self.remove_silence:
# NOTE: This is using the FS-STT buffer directly, not the S-STT queue
self.stt.stream.buffer.clear()
extracted_speech = self.vad.extract_speech(self.stt_audio_bytes)
# only deposit non empty audio
if extracted_speech:
LOG.debug("removed silence from utterance")
self.stt.stream.update(extracted_speech)
n_chunks = len(self.stt_audio_bytes) / self.mic.chunk_size
seconds = n_chunks * self.mic.seconds_per_chunk
if seconds > 1:
extracted_speech = self.vad.extract_speech(self.stt_audio_bytes)
n_chunks = len(extracted_speech) / self.mic.chunk_size
seconds = n_chunks * self.mic.seconds_per_chunk
if extracted_speech and seconds >= 1:
self.stt.stream.buffer.clear()
LOG.info("removed silence from utterance recording")
# replace the stt buffer with cropped audio
self.stt.stream.update(extracted_speech)
else:
LOG.warning("trimmed audio appears to be all silence! "
"skipping VAD silence removal")
else:
LOG.info("recorded audio <= 1 second, skipping silence removal")

text, stt_context = self._get_tx(stt_context)

Expand Down

0 comments on commit db2be26

Please sign in to comment.