-
-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add XTTSv2 (coqui_tts extension) (#4673)
--------- Co-authored-by: oobabooga <[email protected]>
- Loading branch information
1 parent
ff24648
commit 8dc9ec3
Showing
8 changed files
with
964 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"Arabic": "ar", | ||
"Chinese": "zh-cn", | ||
"Czech": "cs", | ||
"Dutch": "nl", | ||
"English": "en", | ||
"French": "fr", | ||
"German": "de", | ||
"Hungarian": "hu", | ||
"Italian": "it", | ||
"Japanese": "ja", | ||
"Korean": "ko", | ||
"Polish": "pl", | ||
"Portuguese": "pt", | ||
"Russian": "ru", | ||
"Spanish": "es", | ||
"Turkish": "tr" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
TTS==0.20.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
import html | ||
import json | ||
import random | ||
import time | ||
from pathlib import Path | ||
|
||
import gradio as gr | ||
|
||
from modules import chat, shared, ui_chat | ||
from modules.logging_colors import logger | ||
from modules.ui import create_refresh_button | ||
from modules.utils import gradio | ||
|
||
try: | ||
from TTS.api import TTS | ||
except ModuleNotFoundError: | ||
logger.error( | ||
"Could not find the TTS module. Make sure to install the requirements for the coqui_tts extension." | ||
"\n" | ||
"\nLinux / Mac:\npip install -r extensions/coqui_tts/requirements.txt\n" | ||
"\nWindows:\npip install -r extensions\\coqui_tts\\requirements.txt\n" | ||
"\n" | ||
"If you used the one-click installer, paste the command above in the terminal window launched after running the \"cmd_\" script. On Windows, that's \"cmd_windows.bat\"." | ||
) | ||
|
||
raise | ||
|
||
|
||
params = { | ||
"activate": True, | ||
"autoplay": True, | ||
"show_text": False, | ||
"voice": "female_01.wav", | ||
"language": "English", | ||
"model_name": "tts_models/multilingual/multi-dataset/xtts_v2", | ||
"device": "cuda" | ||
} | ||
|
||
this_dir = str(Path(__file__).parent.resolve()) | ||
model = None | ||
with open(Path(f"{this_dir}/languages.json"), encoding='utf8') as f: | ||
languages = json.load(f) | ||
|
||
|
||
def get_available_voices(): | ||
return sorted([voice.name for voice in Path(f"{this_dir}/voices").glob("*.wav")]) | ||
|
||
|
||
def preprocess(raw_input): | ||
raw_input = html.unescape(raw_input) | ||
# raw_input = raw_input.strip("\"") | ||
return raw_input | ||
|
||
|
||
def load_model(): | ||
model = TTS(params["model_name"]).to(params["device"]) | ||
return model | ||
|
||
|
||
def remove_tts_from_history(history): | ||
for i, entry in enumerate(history['internal']): | ||
history['visible'][i] = [history['visible'][i][0], entry[1]] | ||
|
||
return history | ||
|
||
|
||
def toggle_text_in_history(history): | ||
for i, entry in enumerate(history['visible']): | ||
visible_reply = entry[1] | ||
if visible_reply.startswith('<audio'): | ||
if params['show_text']: | ||
reply = history['internal'][i][1] | ||
history['visible'][i] = [history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"] | ||
else: | ||
history['visible'][i] = [history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>"] | ||
|
||
return history | ||
|
||
|
||
def random_sentence(): | ||
with open(Path("extensions/coqui_tts/harvard_sentences.txt")) as f: | ||
return random.choice(list(f)) | ||
|
||
|
||
def voice_preview(string): | ||
string = html.unescape(string) or random_sentence() | ||
|
||
output_file = Path('extensions/coqui_tts/outputs/voice_preview.wav') | ||
model.tts_to_file( | ||
text=string, | ||
file_path=output_file, | ||
speaker_wav=[f"{this_dir}/voices/{params['voice']}"], | ||
language=languages[params["language"]] | ||
) | ||
|
||
return f'<audio src="file/{output_file.as_posix()}?{int(time.time())}" controls autoplay></audio>' | ||
|
||
|
||
def history_modifier(history): | ||
# Remove autoplay from the last reply | ||
if len(history['internal']) > 0: | ||
history['visible'][-1] = [ | ||
history['visible'][-1][0], | ||
history['visible'][-1][1].replace('controls autoplay>', 'controls>') | ||
] | ||
|
||
return history | ||
|
||
|
||
def state_modifier(state): | ||
if not params['activate']: | ||
return state | ||
|
||
state['stream'] = False | ||
return state | ||
|
||
|
||
def input_modifier(string, state): | ||
if not params['activate']: | ||
return string | ||
|
||
shared.processing_message = "*Is recording a voice message...*" | ||
return string | ||
|
||
|
||
def output_modifier(string, state): | ||
if not params['activate']: | ||
return string | ||
|
||
original_string = string | ||
string = preprocess(html.unescape(string)) | ||
if string == '': | ||
string = '*Empty reply, try regenerating*' | ||
else: | ||
output_file = Path(f'extensions/coqui_tts/outputs/{state["character_menu"]}_{int(time.time())}.wav') | ||
model.tts_to_file( | ||
text=string, | ||
file_path=output_file, | ||
speaker_wav=[f"{this_dir}/voices/{params['voice']}"], | ||
language=languages[params["language"]] | ||
) | ||
|
||
autoplay = 'autoplay' if params['autoplay'] else '' | ||
string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>' | ||
if params['show_text']: | ||
string += f'\n\n{original_string}' | ||
|
||
shared.processing_message = "*Is typing...*" | ||
return string | ||
|
||
|
||
def custom_css(): | ||
path_to_css = Path(f"{this_dir}/style.css") | ||
return open(path_to_css, 'r').read() | ||
|
||
|
||
def setup(): | ||
global model | ||
print("[XTTS] Loading XTTS...") | ||
model = load_model() | ||
print("[XTTS] Done!") | ||
Path(f"{this_dir}/outputs").mkdir(parents=True, exist_ok=True) | ||
|
||
|
||
def ui(): | ||
with gr.Accordion("Coqui TTS (XTTSv2)"): | ||
with gr.Row(): | ||
activate = gr.Checkbox(value=params['activate'], label='Activate TTS') | ||
autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically') | ||
|
||
with gr.Row(): | ||
show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player') | ||
|
||
with gr.Row(): | ||
with gr.Row(): | ||
voice = gr.Dropdown(get_available_voices(), label="Voice wav", value=params["voice"]) | ||
create_refresh_button(voice, lambda: None, lambda: {'choices': get_available_voices(), 'value': params["voice"]}, 'refresh-button') | ||
|
||
language = gr.Dropdown(languages.keys(), label="Language", value=params["language"]) | ||
|
||
with gr.Row(): | ||
preview_text = gr.Text(show_label=False, placeholder="Preview text", elem_id="silero_preview_text") | ||
preview_play = gr.Button("Preview") | ||
preview_audio = gr.HTML(visible=False) | ||
|
||
with gr.Row(): | ||
convert = gr.Button('Permanently replace audios with the message texts') | ||
convert_cancel = gr.Button('Cancel', visible=False) | ||
convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False) | ||
|
||
# Convert history with confirmation | ||
convert_arr = [convert_confirm, convert, convert_cancel] | ||
convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr) | ||
convert_confirm.click( | ||
lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then( | ||
remove_tts_from_history, gradio('history'), gradio('history')).then( | ||
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( | ||
chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) | ||
|
||
convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr) | ||
|
||
# Toggle message text in history | ||
show_text.change( | ||
lambda x: params.update({"show_text": x}), show_text, None).then( | ||
toggle_text_in_history, gradio('history'), gradio('history')).then( | ||
chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then( | ||
chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) | ||
|
||
# Event functions to update the parameters in the backend | ||
activate.change(lambda x: params.update({"activate": x}), activate, None) | ||
autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None) | ||
voice.change(lambda x: params.update({"voice": x}), voice, None) | ||
language.change(lambda x: params.update({"language": x}), language, None) | ||
|
||
# Play preview | ||
preview_text.submit(voice_preview, preview_text, preview_audio) | ||
preview_play.click(voice_preview, preview_text, preview_audio) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
.SDAP .hires_opts input[type="number"] { | ||
width: 6em !important; | ||
} | ||
|
||
/* silero_tts preview */ | ||
.form:has(> #silero_preview_text) { | ||
min-width: 75% | ||
} |
Binary file not shown.
Binary file not shown.
Binary file not shown.