From 24238415a8ad21a46a7362f2971e91e486499346 Mon Sep 17 00:00:00 2001 From: David Zhao Date: Sat, 3 Aug 2024 14:41:35 -0700 Subject: [PATCH] Added a few docstrings and corrected typos (#542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Théo Monnom --- .../agents/voice_assistant/voice_assistant.py | 16 ++++++------- livekit-agents/livekit/agents/worker.py | 24 +++++++++++++++++++ .../livekit/plugins/silero/vad.py | 5 ++-- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/livekit-agents/livekit/agents/voice_assistant/voice_assistant.py b/livekit-agents/livekit/agents/voice_assistant/voice_assistant.py index ef54224fe..d7ef2f8b8 100644 --- a/livekit-agents/livekit/agents/voice_assistant/voice_assistant.py +++ b/livekit-agents/livekit/agents/voice_assistant/voice_assistant.py @@ -104,8 +104,8 @@ class AssistantTranscriptionOptions: """The speed at which the agent's speech transcription is forwarded to the client. We try to mimic the agent's speech speed by adjusting the transcription speed.""" sentence_tokenizer: tokenize.SentenceTokenizer = tokenize.basic.SentenceTokenizer() - """The tokenizer used to split the speech into sentences. - This is used to device when to mark a transcript as final for the agent transcription.""" + """The tokenizer used to split the speech into sentences. + This is used to decide when to mark a transcript as final for the agent transcription.""" word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer() """The tokenizer used to split the speech into words. This is used to simulate the "interim results" of the agent transcription.""" @@ -509,18 +509,18 @@ async def _play_speech(self, speech_info: _SpeechInfo) -> None: return user_question = speech_info.user_question - user_speech_commited = False + user_speech_committed = False play_handle = synthesis_handle.play() join_fut = play_handle.join() def _commit_user_question_if_needed() -> None: - nonlocal user_speech_commited + nonlocal user_speech_committed if ( not user_question or synthesis_handle.interrupted - or user_speech_commited + or user_speech_committed ): return @@ -549,7 +549,7 @@ def _commit_user_question_if_needed() -> None: self.emit("user_speech_committed", user_msg) self._transcribed_text = self._transcribed_text[len(user_question) :] - user_speech_commited = True + user_speech_committed = True # wait for the play_handle to finish and check every 1s if the user question should be committed _commit_user_question_if_needed() @@ -576,7 +576,7 @@ def _commit_user_question_if_needed() -> None: if is_using_tools and not interrupted: assert isinstance(speech_info.source, LLMStream) assert ( - user_speech_commited + user_speech_committed ), "user speech should have been committed before using tools" # execute functions @@ -638,7 +638,7 @@ def _commit_user_question_if_needed() -> None: collected_text = answer_synthesis.collected_text interrupted = answer_synthesis.interrupted - if speech_info.add_to_chat_ctx and (not user_question or user_speech_commited): + if speech_info.add_to_chat_ctx and (not user_question or user_speech_committed): self._chat_ctx.messages.extend(extra_tools_messages) msg = ChatMessage.create(text=collected_text, role="assistant") diff --git a/livekit-agents/livekit/agents/worker.py b/livekit-agents/livekit/agents/worker.py index 40ead5e30..9f558d85c 100644 --- a/livekit-agents/livekit/agents/worker.py +++ b/livekit-agents/livekit/agents/worker.py @@ -68,21 +68,45 @@ class WorkerPermissions: @dataclass class WorkerOptions: entrypoint_fnc: Callable[[JobContext], Coroutine] + """Entrypoint function that will be called when a job is assigned to this worker.""" request_fnc: Callable[[JobRequest], Coroutine] = _default_request_fnc + """Inspect the request and decide if the current worker should handle it. + + When left empty, all jobs are accepted.""" prewarm_fnc: Callable[[JobProcess], Any] = _default_initialize_process_fnc + """A function to perform any necessary initialization before the job starts.""" load_fnc: Callable[[], float] = _default_cpu_load_fnc + """Called to determine the current load of the worker. Should return a value between 0 and 1.""" load_threshold: float = 0.65 + """When the load exceeds this threshold, the worker will be marked as unavailable.""" num_idle_processes: int = 3 + """Number of idle processes to keep warm.""" shutdown_process_timeout: float = 60.0 + """Maximum amount of time to wait for a job to shut down gracefully""" initialize_process_timeout: float = 10.0 + """Maximum amount of time to wait for a process to initialize/prewarm""" permissions: WorkerPermissions = field(default_factory=WorkerPermissions) + """Permissions that the agent should join the room with.""" worker_type: agent.JobType = agent.JobType.JT_ROOM + """Whether to spin up an agent for each room or publisher.""" max_retry: int = MAX_RECONNECT_ATTEMPTS ws_url: str = "ws://localhost:7880" + """URL to connect to the LiveKit server. + + By default it uses ``LIVEKIT_URL`` from environment""" api_key: str | None = None + """API key to authenticate with LiveKit. + + By default it uses ``LIVEKIT_API_KEY`` from environment""" api_secret: str | None = None + """API secret to authenticate with LiveKit. + + By default it uses ``LIVEKIT_API_SECRET`` from environment""" host: str = "" # default to all interfaces port: int = 8081 + """Port for local HTTP server to listen on. + + The HTTP server is used as a health check endpoint.""" EventTypes = Literal["worker_registered"] diff --git a/livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py b/livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py index b228f02cf..2cd23c9da 100644 --- a/livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py +++ b/livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py @@ -52,8 +52,9 @@ def load( force_cpu: bool = True, ) -> "VAD": """ - Initialize the Silero VAD with the given options. - The options are already set to strong defaults. + Initialize the Silero VAD. + + When options are not provided, sane defaults are used. Args: min_speech_duration: minimum duration of speech to start a new speech chunk