-
Notifications
You must be signed in to change notification settings - Fork 0
/
Talker.py
110 lines (83 loc) · 3.32 KB
/
Talker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
Talks the given message by using gTTS to convert text to speech.
Saves generated speech to mp3 files in a database folder.
Checks if a file already exists before generating a new one.
Plays the speech using playsound.
Can specify language, voice ID, whether to delete the file after playing,
and whether to use gTTS vs ElevenLabs.
"""
import time
import pyttsx3
from gtts import gTTS
from playsound import playsound
import os
from elevenlabs import generate, save, Voice
import hashlib
"""
Method that creates an hash without salt, for files names.
"""
def create_hash(input_string):
sha256_hash = hashlib.sha256()
sha256_hash.update(input_string.encode('utf-8'))
hash_result = sha256_hash.hexdigest()
return hash_result
engine = pyttsx3.init()
class Talker:
"""
Constructor, it defines the talker instance
parameters:
- voice: if GTTs is used this parameter is the voice language (ex: ja),
otherwise if elevenlabs is used is the voice id.
- api: This parameter is used for selecting the service used for the speech
if it is equals to "" GTTs will be used, otherwise if is equal to a working
api elevenlabs is going to be used.
- temp: if False the files are saved to the database, otherwise they will be
temporary.
Note: if there is an error the method will use pyttsx3 as failsave
"""
def __init__(self, voice = "en", api = "", temp = False) -> None:
self.voice = voice
self.api = api
self.temp = temp
"""
talk() generates an audio file for the given message text
using text-to-speech, and plays the audio.
It supports multiple TTS engines and voices.
- message is the string
it switches between 3 methods of speech, elevenlabs is the best (but needs an account and api to work),
google is good and pyttsx3 is enough (works offline)
As a failsafe (no database file, no internet connection or error) the method will use pyttsx3
"""
def talk(self, message):
try:
filename = "database/"+(str(create_hash(message)).upper().strip().replace(" ","").replace(":","")+"0000000000")[0:7]+"_"+str(self.voice).upper()[0:2]+".mp3"
print(message)
try:
#Try to play sound from database
playsound(filename)
#print("Sound in database")
except:
if(self.api == ""):
#Google
tts = gTTS(str(message),lang=str(self.voice))
tts.save(filename)
else:
#Elevenlabs
audio = generate(
api_key = self.api,
text = message,
voice = Voice(
voice_id=self.voice,
),
model="eleven_multilingual_v2"
)
save(audio,filename)
time.sleep(0.1)
playsound(filename)
#print("No sound in database")
if(self.temp):
os.remove(filename)
except:
#failsafe
engine.say(str(message))
engine.runAndWait()