Skip to content

Commit

Permalink
Update deepgram endpointing (#145)
Browse files Browse the repository at this point in the history
* deepgram: Add min_silence_duration to deepgram client.

deepgram controls vad by endpointing parameter, this fix allows to configure min_silence_duration in agents layer

* add utterance_end_ms and speech_final to Deepgram plugin

* add utterance_end_ms and speech_final to Deepgram plugin

* expose speech_final as end_of_speech

---------

Co-authored-by: Lam Nguyen <[email protected]>
  • Loading branch information
seanmuirhead and Vibrat authored Feb 6, 2024
1 parent 27ea3c1 commit 604d7e3
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
1 change: 1 addition & 0 deletions livekit-agents/livekit/agents/stt/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class SpeechData:
class SpeechEvent:
is_final: bool
alternatives: List[SpeechData]
end_of_speech: bool = False


class STT(ABC):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class STTOptions:
punctuate: bool
model: DeepgramModels
smart_format: bool

endpointing: Optional[str]

class STT(stt.STT):
def __init__(
Expand All @@ -36,6 +36,7 @@ def __init__(
model: DeepgramModels = "nova-2-general",
api_key: Optional[str] = None,
api_url: Optional[str] = None,
min_silence_duration: int = 10,
) -> None:
super().__init__(streaming_supported=True)
api_key = api_key or os.environ.get("DEEPGRAM_API_KEY")
Expand All @@ -51,6 +52,7 @@ def __init__(
punctuate=punctuate,
model=model,
smart_format=smart_format,
endpointing=str(min_silence_duration),
)

def _sanitize_options(
Expand Down Expand Up @@ -197,6 +199,7 @@ async def on_transcript_received(
sample_rate=self._sample_rate,
smart_format=self._config.smart_format,
punctuate=self._config.punctuate,
endpointing=self._config.endpointing,
)
await self._live.start(dg_opts)
opened = True
Expand Down Expand Up @@ -244,6 +247,7 @@ def live_transcription_to_speech_event(

return stt.SpeechEvent(
is_final=event.is_final or False, # could be None?
end_of_speech=event.speech_final or False,
alternatives=[
stt.SpeechData(
language=language or "",
Expand Down

0 comments on commit 604d7e3

Please sign in to comment.