Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/sync fork neil/1 #172

Closed
wants to merge 6 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

@dataclass
class Voice:
id: str
voice_id: str
name: str
category: str
settings: Optional["VoiceSettings"] = None
Expand All @@ -44,7 +44,7 @@ class VoiceSettings:


DEFAULT_VOICE = Voice(
id="EXAVITQu4vr4xnSDxMaL",
voice_id="EXAVITQu4vr4xnSDxMaL",
name="Bella",
category="premade",
settings=VoiceSettings(
Expand Down Expand Up @@ -108,7 +108,7 @@ async def synthesize(
) -> tts.SynthesizedAudio:
voice = self._config.voice
async with self._session.post(
f"{self._config.base_url}/text-to-speech/{voice.id}?output_format=pcm_44100",
f"{self._config.base_url}/text-to-speech/{voice.voice_id}?output_format=mp3_44100_128",
headers={AUTHORIZATION_HEADER: self._config.api_key},
json=dict(
text=text,
Expand Down Expand Up @@ -159,9 +159,9 @@ def log_exception(task: asyncio.Task) -> None:

def _stream_url(self) -> str:
base_url = self._config.base_url
voice_id = self._config.voice.id
voice_id = self._config.voice.voice_id
model_id = self._config.model_id
return f"{base_url}/text-to-speech/{voice_id}/stream-input?model_id={model_id}&output_format=pcm_{self._config.sample_rate}&optimize_streaming_latency={self._config.latency}"
return f"{base_url}/text-to-speech/{voice_id}/stream-input?model_id={model_id}&output_format=mp3_44100_128&optimize_streaming_latency={self._config.latency}"

def push_text(self, token: str) -> None:
if self._closed:
Expand All @@ -184,6 +184,7 @@ async def _run(self, max_retry: int) -> None:
retry_count = 0
listen_task: Optional[asyncio.Task] = None
ws: Optional[aiohttp.ClientWebSocketResponse] = None
retry_text_queue: asyncio.Queue[str] = asyncio.Queue()
while True:
try:
ws = await self._try_connect()
Expand All @@ -194,7 +195,13 @@ async def _run(self, max_retry: int) -> None:
# forward queued text to 11labs
started = False
while not ws.closed:
text = await self._queue.get()
text = None
if not retry_text_queue.empty():
text = await retry_text_queue.get()
retry_text_queue.task_done()
else:
text = await self._queue.get()

if not started:
self._event_queue.put_nowait(
tts.SynthesisEvent(type=tts.SynthesisEventType.STARTED)
Expand All @@ -204,7 +211,19 @@ async def _run(self, max_retry: int) -> None:
text=text,
try_trigger_generation=True,
)
await ws.send_str(json.dumps(text_packet))

# This case can happen in normal operation because 11labs will not
# keep connections open indefinitely if we are not sending data.
try:
await ws.send_str(json.dumps(text_packet))
except Exception:
await retry_text_queue.put(text)
break

# We call self._queue.task_done() even if we are retrying the text because
# all text has gone through self._queue. An exception may have short-circuited
# out of the loop so task_done() will not have already been called on text that
# is being retried.
self._queue.task_done()
if text == STREAM_EOS:
await listen_task
Expand Down Expand Up @@ -307,10 +326,10 @@ def dict_to_voices_list(data: dict) -> List[Voice]:
for voice in data["voices"]:
voices.append(
Voice(
id=voice["voice_id"],
voice_id=voice["voice_id"],
name=voice["name"],
category=voice["category"],
settings=None,
)
)
return voices
return voices