Skip to content

Commit

Permalink
cartesia: remove deprecated model & use new API (livekit#425)
Browse files Browse the repository at this point in the history
  • Loading branch information
theomonnom authored Jul 8, 2024
1 parent d098b9f commit 382909c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,6 @@
]


TTSModels = Literal["upbeat-moon"]


# fmt: off
# Barbershop Man in upbeat-moon
TTSDefaultVoiceEmbedding: list[float] = [
-0.033633083, 0.072083704, -0.01807767, -0.083488315, -0.04407617, 0.0022592682, 0.070505895,
0.023946615, -0.04788024, -0.06388413, -0.0716355, -0.0022612812, -0.0053448505, -0.07848381,
0.0348162, -0.053745482, -0.092399485, -0.02950225, 0.028591828, -0.10556894, 0.023313355,
0.06224387, 0.0362463, 0.029258432, 0.10769641, 0.043595582, -0.058543224, -0.080402784,
-0.0953816, -0.008988032, -0.0028981369, -0.004752721, -0.20742874, 0.058907595, 0.08813939,
-0.06192675, 0.099082634, -0.09661578, -0.0077761724, -0.013982456, -0.025798267, 0.04467142,
0.026222011, 0.023023574, 0.011227064, -0.17462021, -0.09880612, -0.1521035, -0.060464993,
-0.04735665, -0.09725187, -0.006127679, 0.15818526, -0.039493002, -0.067719474, 0.0066190436,
-0.10636633, 0.17073768, -0.051717706, 0.03186961, -0.020547207, -0.02244247, 0.013196935,
-0.06431055, -0.115360335, 0.016918058, -0.033195216, 0.11255181, 0.020366343, -0.041032124,
0.08780918, -0.040567942, 0.057276532, 0.05848221, -0.077479474, -0.073524915, -0.01913317,
-0.029291833, 0.11210393, -0.09859328, 0.2152541, -0.022976823, 0.028627992, -0.039598297,
0.041829932, -0.05593181, -0.06444655, -0.018057477, -0.008098263, 0.05994528, 0.10430693,
-0.13121894, -0.06512868, -0.026126215, 0.046727825, -0.17180993, -0.10577226, -0.08610466,
0.008862588, 0.09547498, -0.010965332, -0.061217085, -0.038954042, 0.019930292, -0.017192135,
0.007296275, 0.03273872, 0.04389937, -0.056483064, 0.003420891, -0.10319067, -0.015706042,
0.1308774, -0.0018035866, -0.03582506, 0.077131025, 0.013398928, 0.003188886, 0.12039741,
-0.033974767, 0.06899378, -0.059775922, -0.026934423, 0.028482193, 0.100996524, 0.004498743,
-0.02291186, 0.078752205, -0.0063796206, 0.04206536, 0.05721349, 0.06290694, 0.06130212,
0.096969016, -0.057664312, -0.16727506, -0.035220966, 0.090760484, 0.010039947, 0.06513242,
0.011055657, -0.004258431, -0.08316792, -0.15650468, -0.076931365, 0.11385587, -0.038372636,
0.015648656, -0.12029895, -0.06604956, 0.009441591, -0.11912808, 0.013378132, 0.029525978,
-0.0056742397, -0.0075976513, 0.019999338, -0.05521377, -0.07650746, -0.017710293, -0.033986397,
-0.047768556, 0.13857274, 0.099290825, 0.11736938, 0.017834296, -0.07140237, -0.052047748,
-0.06398965, -0.037033975, -0.061061256, -0.03330076, -0.024472248, -0.059656, 0.05359946,
-0.043915518, -0.086325996, 0.14189173, 0.021086395, 0.02945159, 0.1029604, 0.018490415,
-0.028736332, -0.025272416, -0.06082937, -0.031339463, -0.0007249595, 0.025595888, 0.007144545,
-0.16938712, -0.1160664, -0.0654145,
]
# fmt: on
TTSModels = Literal["sonic-english", "sonic-multilingual"]
TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
TTSDefaultVoiceId = "248be419-c632-4f23-adf1-5324ed7dbf1d"
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from livekit.agents import tts, utils

from .log import logger
from .models import TTSDefaultVoiceEmbedding, TTSEncoding, TTSModels
from .models import TTSDefaultVoiceId, TTSEncoding, TTSModels

API_AUTH_HEADER = "X-API-Key"
API_VERSION_HEADER = "Cartesia-Version"
Expand All @@ -39,15 +39,17 @@ class _TTSOptions:
sample_rate: int
voice: str | list[float]
api_key: str
language: str


class TTS(tts.TTS):
def __init__(
self,
*,
model: TTSModels = "upbeat-moon",
model: TTSModels = "sonic-english",
language: str = "en",
encoding: TTSEncoding = "pcm_s16le",
voice: str | list[float] = TTSDefaultVoiceEmbedding,
voice: str | list[float] = TTSDefaultVoiceId,
sample_rate: int = 24000,
api_key: str | None = None,
http_session: aiohttp.ClientSession | None = None,
Expand All @@ -64,6 +66,7 @@ def __init__(

self._opts = _TTSOptions(
model=model,
language=language,
encoding=encoding,
sample_rate=sample_rate,
voice=voice,
Expand Down Expand Up @@ -120,6 +123,7 @@ async def _run(self):
"encoding": self._opts.encoding,
"sample_rate": self._opts.sample_rate,
},
"language": self._opts.language,
},
) as resp:
bytes_per_frame = (self._opts.sample_rate // 100) * 2
Expand Down

0 comments on commit 382909c

Please sign in to comment.