From 9c9cd8b4fb89e199c7f3220bcdd28c435b376243 Mon Sep 17 00:00:00 2001 From: David Zhao Date: Sun, 28 Jul 2024 22:49:39 -0700 Subject: [PATCH 1/3] Added a few docstrings and corrected typos --- .../agents/voice_assistant/voice_assistant.py | 16 ++++++++-------- livekit-agents/livekit/agents/worker.py | 14 ++++++++++++++ .../livekit/plugins/silero/vad.py | 5 +++-- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/livekit-agents/livekit/agents/voice_assistant/voice_assistant.py b/livekit-agents/livekit/agents/voice_assistant/voice_assistant.py index 324f20505..c637ddcc1 100644 --- a/livekit-agents/livekit/agents/voice_assistant/voice_assistant.py +++ b/livekit-agents/livekit/agents/voice_assistant/voice_assistant.py @@ -100,8 +100,8 @@ class AssistantTranscriptionOptions: """The speed at which the agent's speech transcription is forwarded to the client. We try to mimic the agent's speech speed by adjusting the transcription speed.""" sentence_tokenizer: tokenize.SentenceTokenizer = tokenize.basic.SentenceTokenizer() - """The tokenizer used to split the speech into sentences. - This is used to device when to mark a transcript as final for the agent transcription.""" + """The tokenizer used to split the speech into sentences. + This is used to decide when to mark a transcript as final for the agent transcription.""" word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer() """The tokenizer used to split the speech into words. This is used to simulate the "interim results" of the agent transcription.""" @@ -512,18 +512,18 @@ async def _play_speech(self, speech_info: _SpeechInfo) -> None: return user_question = speech_info.user_question - user_speech_commited = False + user_speech_committed = False play_handle = synthesis_handle.play() join_fut = play_handle.join() def _commit_user_message_if_needed() -> None: - nonlocal user_speech_commited + nonlocal user_speech_committed if ( not user_question or synthesis_handle.interrupted - or user_speech_commited + or user_speech_committed ): return @@ -545,7 +545,7 @@ def _commit_user_message_if_needed() -> None: self.emit("user_speech_committed", user_msg) self._transcribed_text = self._transcribed_text[len(user_question) :] - user_speech_commited = True + user_speech_committed = True # wait for the play_handle to finish and check every 1s if the user question should be committed while not join_fut.done(): @@ -570,7 +570,7 @@ def _commit_user_message_if_needed() -> None: if is_using_tools and not interrupted: assert isinstance(speech_info.source, LLMStream) assert ( - user_speech_commited + user_speech_committed ), "user speech should be committed before using tools" # execute functions @@ -616,7 +616,7 @@ def _commit_user_message_if_needed() -> None: collected_text = answer_synthesis.collected_text interrupted = answer_synthesis.interrupted - if speech_info.add_to_chat_ctx and (not user_question or user_speech_commited): + if speech_info.add_to_chat_ctx and (not user_question or user_speech_committed): self._chat_ctx.messages.extend(extra_tools_messages) msg = ChatMessage.create(text=collected_text, role="assistant") diff --git a/livekit-agents/livekit/agents/worker.py b/livekit-agents/livekit/agents/worker.py index eb7d147ec..4d63ba403 100644 --- a/livekit-agents/livekit/agents/worker.py +++ b/livekit-agents/livekit/agents/worker.py @@ -68,21 +68,35 @@ class WorkerPermissions: @dataclass class WorkerOptions: entrypoint_fnc: Callable[[JobContext], Coroutine] + """Entrypoint function that will be called when a job is assigned to this worker.""" request_fnc: Callable[[JobRequest], Coroutine] = _default_request_fnc + """Inspect the request and decide if the current worker should handle it. + + When left empty, all jobs are accepted.""" prewarm_fnc: Callable[[JobProcess], Any] = _default_initialize_process_fnc + """A function to perform any necessary initialization before the job starts.""" load_fnc: Callable[[], float] = _default_cpu_load_fnc + """Called to determine the current load of the worker. Should return a value between 0 and 1.""" load_threshold: float = 0.8 + """When the load exceeds this threshold, the worker will be marked as unavailable.""" num_idle_processes: int = 3 + """Number of idle processes to keep warm.""" shutdown_process_timeout: float = 60.0 initialize_process_timeout: float = 10.0 permissions: WorkerPermissions = field(default_factory=WorkerPermissions) + """Permissions that the agent should join the room with.""" worker_type: agent.JobType = agent.JobType.JT_ROOM + """Whether to spin up an agent for each room or publisher.""" max_retry: int = MAX_RECONNECT_ATTEMPTS ws_url: str = "ws://localhost:7880" + """URL to connect to the LiveKit server. Uses LIVEKIT_URL from environment""" api_key: str | None = None + """API key to authenticate with LiveKit. Uses LIVEKIT_API_KEY from environment""" api_secret: str | None = None + """API secret to authenticate with LiveKit. Uses LIVEKIT_API_SECRET from environment""" host: str = "" # default to all interfaces port: int = 8081 + """Port for local HTTP server to listen on. """ EventTypes = Literal["worker_registered"] diff --git a/livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py b/livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py index 283ad4926..f6a3f2bac 100644 --- a/livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py +++ b/livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py @@ -51,8 +51,9 @@ def load( force_cpu: bool = True, ) -> "VAD": """ - Initialize the Silero VAD with the given options. - The options are already set to strong defaults. + Initialize the Silero VAD. + + When options are not provided, sane defaults are used. Args: min_speech_duration: minimum duration of speech to start a new speech chunk From c417d872fca15cd6d20a62fd9875418ab79ac2ad Mon Sep 17 00:00:00 2001 From: David Zhao Date: Fri, 2 Aug 2024 15:56:10 -0700 Subject: [PATCH 2/3] Update livekit-agents/livekit/agents/worker.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Théo Monnom --- livekit-agents/livekit/agents/worker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/livekit-agents/livekit/agents/worker.py b/livekit-agents/livekit/agents/worker.py index 4d63ba403..d0f1d0ded 100644 --- a/livekit-agents/livekit/agents/worker.py +++ b/livekit-agents/livekit/agents/worker.py @@ -82,7 +82,9 @@ class WorkerOptions: num_idle_processes: int = 3 """Number of idle processes to keep warm.""" shutdown_process_timeout: float = 60.0 + """Maximum amount of time to wait for a job to shut down gracefully""" initialize_process_timeout: float = 10.0 + """Maximum amount of time to wait for a process to initialize/prewarm""" permissions: WorkerPermissions = field(default_factory=WorkerPermissions) """Permissions that the agent should join the room with.""" worker_type: agent.JobType = agent.JobType.JT_ROOM From bb3f9ac65a0a17b257c8dbdc9cace2dee8f73cc6 Mon Sep 17 00:00:00 2001 From: David Zhao Date: Sat, 3 Aug 2024 14:33:01 -0700 Subject: [PATCH 3/3] formatting --- livekit-agents/livekit/agents/worker.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/livekit-agents/livekit/agents/worker.py b/livekit-agents/livekit/agents/worker.py index d0f1d0ded..9c6c76173 100644 --- a/livekit-agents/livekit/agents/worker.py +++ b/livekit-agents/livekit/agents/worker.py @@ -91,14 +91,22 @@ class WorkerOptions: """Whether to spin up an agent for each room or publisher.""" max_retry: int = MAX_RECONNECT_ATTEMPTS ws_url: str = "ws://localhost:7880" - """URL to connect to the LiveKit server. Uses LIVEKIT_URL from environment""" + """URL to connect to the LiveKit server. + + By default it uses ``LIVEKIT_URL`` from environment""" api_key: str | None = None - """API key to authenticate with LiveKit. Uses LIVEKIT_API_KEY from environment""" + """API key to authenticate with LiveKit. + + By default it uses ``LIVEKIT_API_KEY`` from environment""" api_secret: str | None = None - """API secret to authenticate with LiveKit. Uses LIVEKIT_API_SECRET from environment""" + """API secret to authenticate with LiveKit. + + By default it uses ``LIVEKIT_API_SECRET`` from environment""" host: str = "" # default to all interfaces port: int = 8081 - """Port for local HTTP server to listen on. """ + """Port for local HTTP server to listen on. + + The HTTP server is used as a health check endpoint.""" EventTypes = Literal["worker_registered"]