google-tts: ignore wav header (#703)

livekit · Sep 4, 2024 · 3fd86b3 · 3fd86b3
1 parent 38999bd
commit 3fd86b3
Show file tree

Hide file tree

Showing 5 changed files with 17 additions and 2 deletions.
diff --git a/.changeset/early-guests-join.md b/.changeset/early-guests-join.md
@@ -0,0 +1,5 @@
+---
+"livekit-plugins-google": patch
+---
+
+google-tts: ignore wav header
diff --git a/examples/text-to-speech/elevenlabs_tts.py b/examples/text-to-speech/elevenlabs_tts.py
@@ -2,13 +2,16 @@
 import logging
 from typing import Optional
 
+from dotenv import load_dotenv
 from livekit import rtc
 from livekit.agents import JobContext, WorkerOptions, cli
 from livekit.plugins import elevenlabs
 
 logger = logging.getLogger("elevenlabs-tts-demo")
 logger.setLevel(logging.INFO)
 
+load_dotenv()
+
 
 def _text_to_chunks(text: str) -> list[str]:
     """Split the text into chunks of 2, 3, and 4 words"""

diff --git a/examples/text-to-speech/openai_tts.py b/examples/text-to-speech/openai_tts.py
@@ -1,9 +1,12 @@
 import asyncio
 import logging
 
+from dotenv import load_dotenv
 from livekit import rtc
 from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
-from livekit.plugins import openai
+from livekit.plugins import google
+
+load_dotenv()
 
 logger = logging.getLogger("openai-tts-demo")
 logger.setLevel(logging.INFO)
@@ -12,7 +15,7 @@
 async def entrypoint(job: JobContext):
     logger.info("starting tts example agent")
 
-    tts = openai.TTS(model="tts-1", voice="nova")
+    tts = google.TTS()
 
     source = rtc.AudioSource(tts.sample_rate, tts.num_channels)
     track = rtc.LocalAudioTrack.create_audio_track("agent-mic", source)

diff --git a/examples/text-to-speech/sync_tts_transcription.py b/examples/text-to-speech/sync_tts_transcription.py
@@ -2,6 +2,7 @@
 import logging
 from typing import Optional
 
+from dotenv import load_dotenv
 from livekit import rtc
 from livekit.agents import (
     AutoSubscribe,
@@ -13,6 +14,8 @@
 )
 from livekit.plugins import elevenlabs
 
+load_dotenv()
+
 logger = logging.getLogger("transcription-forwarding-demo")
 logger.setLevel(logging.INFO)
 

diff --git a/livekit-plugins/livekit-plugins-google/livekit/plugins/google/tts.py b/livekit-plugins/livekit-plugins-google/livekit/plugins/google/tts.py
@@ -148,6 +148,7 @@ async def _main_task(self) -> None:
                     )
                 )
         else:
+            data = data[44:]  # skip WAV header
             self._event_ch.send_nowait(
                 tts.SynthesizedAudio(
                     request_id=request_id,