Skip to content

Commit

Permalink
google-tts: ignore wav header (#703)
Browse files Browse the repository at this point in the history
  • Loading branch information
theomonnom authored Sep 4, 2024
1 parent 38999bd commit 3fd86b3
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 2 deletions.
5 changes: 5 additions & 0 deletions .changeset/early-guests-join.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"livekit-plugins-google": patch
---

google-tts: ignore wav header
3 changes: 3 additions & 0 deletions examples/text-to-speech/elevenlabs_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
import logging
from typing import Optional

from dotenv import load_dotenv
from livekit import rtc
from livekit.agents import JobContext, WorkerOptions, cli
from livekit.plugins import elevenlabs

logger = logging.getLogger("elevenlabs-tts-demo")
logger.setLevel(logging.INFO)

load_dotenv()


def _text_to_chunks(text: str) -> list[str]:
"""Split the text into chunks of 2, 3, and 4 words"""
Expand Down
7 changes: 5 additions & 2 deletions examples/text-to-speech/openai_tts.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import asyncio
import logging

from dotenv import load_dotenv
from livekit import rtc
from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
from livekit.plugins import openai
from livekit.plugins import google

load_dotenv()

logger = logging.getLogger("openai-tts-demo")
logger.setLevel(logging.INFO)
Expand All @@ -12,7 +15,7 @@
async def entrypoint(job: JobContext):
logger.info("starting tts example agent")

tts = openai.TTS(model="tts-1", voice="nova")
tts = google.TTS()

source = rtc.AudioSource(tts.sample_rate, tts.num_channels)
track = rtc.LocalAudioTrack.create_audio_track("agent-mic", source)
Expand Down
3 changes: 3 additions & 0 deletions examples/text-to-speech/sync_tts_transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
from typing import Optional

from dotenv import load_dotenv
from livekit import rtc
from livekit.agents import (
AutoSubscribe,
Expand All @@ -13,6 +14,8 @@
)
from livekit.plugins import elevenlabs

load_dotenv()

logger = logging.getLogger("transcription-forwarding-demo")
logger.setLevel(logging.INFO)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ async def _main_task(self) -> None:
)
)
else:
data = data[44:] # skip WAV header
self._event_ch.send_nowait(
tts.SynthesizedAudio(
request_id=request_id,
Expand Down

0 comments on commit 3fd86b3

Please sign in to comment.