Skip to content

Commit

Permalink
TTS config: Add a button to download/delete voices for the piper backend
Browse files Browse the repository at this point in the history
  • Loading branch information
kovidgoyal committed Sep 7, 2024
1 parent ef10410 commit 5657351
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 9 deletions.
88 changes: 82 additions & 6 deletions src/calibre/gui2/tts/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
#!/usr/bin/env python
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>


from qt.core import QCheckBox, QFormLayout, QLabel, QLocale, QMediaDevices, QSize, QSlider, Qt, QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, pyqtSignal
from qt.core import (
QCheckBox,
QFormLayout,
QHBoxLayout,
QIcon,
QLabel,
QLocale,
QMediaDevices,
QPushButton,
QSize,
QSlider,
Qt,
QTreeWidget,
QTreeWidgetItem,
QVBoxLayout,
QWidget,
pyqtSignal,
)

from calibre.gui2.tts.types import (
AudioDeviceId,
Expand Down Expand Up @@ -113,10 +129,13 @@ def val(self, v):

class Voices(QTreeWidget):

voice_changed = pyqtSignal()

def __init__(self, parent=None):
super().__init__(parent)
self.setHeaderHidden(True)
self.system_default_voice = Voice()
self.currentItemChanged.connect(self.voice_changed)

def sizeHint(self) -> QSize:
return QSize(400, 500)
Expand Down Expand Up @@ -154,14 +173,23 @@ def lang(langcode):

@property
def val(self) -> str:
voice = self.currentItem().data(0, Qt.ItemDataRole.UserRole)
voice = self.current_voice
return voice.name if voice else ''

@property
def current_voice(self) -> Voice | None:
ci = self.currentItem()
if ci is not None:
return ci.data(0, Qt.ItemDataRole.UserRole)


class EngineSpecificConfig(QWidget):

voice_changed = pyqtSignal()

def __init__(self, parent):
super().__init__(parent)
self.engine_name = ''
self.l = l = QFormLayout(self)
devs = QMediaDevices.audioOutputs()
dad = QMediaDevices.defaultAudioOutput()
Expand All @@ -183,6 +211,7 @@ def __init__(self, parent):
self.audio_device = ad = QComboBox(self)
l.addRow(_('Output a&udio to:'), ad)
self.voices = v = Voices(self)
v.voice_changed.connect(self.voice_changed)
la = QLabel(_('V&oices:'))
la.setBuddy(v)
l.addRow(la)
Expand Down Expand Up @@ -241,6 +270,7 @@ def set_engine(self, engine_name):
else:
self.layout().setRowVisible(self.audio_device, False)
self.rebuild_voices()
return metadata

def rebuild_voices(self):
try:
Expand Down Expand Up @@ -269,6 +299,29 @@ def as_settings(self) -> EngineSpecificSettings:
break
return ans

def voice_action(self):
v = self.voices.current_voice
if v is None:
return
metadata = available_engines()[self.engine_name]
if not metadata.has_managed_voices:
return
tts = create_tts_backend(self.engine_name)
if tts.is_voice_downloaded(v):
tts.delete_voice(v)
else:
tts.download_voice(v)

def current_voice_is_downloaded(self) -> bool:
v = self.voices.current_voice
if v is None:
return False
metadata = available_engines()[self.engine_name]
if not metadata.has_managed_voices:
return False
tts = create_tts_backend(self.engine_name)
return tts.is_voice_downloaded(v)


class ConfigDialog(Dialog):

Expand All @@ -279,12 +332,35 @@ def setup_ui(self):
self.l = l = QVBoxLayout(self)
self.engine_choice = ec = EngineChoice(self)
self.engine_specific_config = esc = EngineSpecificConfig(self)
ec.changed.connect(esc.set_engine)
ec.changed.connect(self.set_engine)
esc.voice_changed.connect(self.update_voice_button)
l.addWidget(ec)
l.addWidget(esc)
l.addWidget(self.bb)
self.voice_button = b = QPushButton(self)
b.clicked.connect(self.voice_action)
h = QHBoxLayout()
l.addLayout(h)
h.addWidget(b), h.addStretch(10), h.addWidget(self.bb)
self.initial_engine_choice = ec.value
esc.set_engine(self.initial_engine_choice)
self.set_engine(self.initial_engine_choice)

def set_engine(self, engine_name: str) -> None:
metadata = self.engine_specific_config.set_engine(engine_name)
self.voice_button.setVisible(metadata.has_managed_voices)
self.update_voice_button()

def update_voice_button(self):
b = self.voice_button
if self.engine_specific_config.current_voice_is_downloaded():
b.setIcon(QIcon.ic('trash.png'))
b.setText(_('Remove downloaded voice'))
else:
b.setIcon(QIcon.ic('download-metadata.png'))
b.setText(_('Download voice'))

def voice_action(self):
self.engine_specific_config.voice_action()
self.update_voice_button()

@property
def engine_changed(self) -> bool:
Expand Down
33 changes: 31 additions & 2 deletions src/calibre/gui2/tts/piper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import re
import sys
from collections import deque
from contextlib import suppress
from dataclasses import dataclass
from itertools import count
from time import monotonic
Expand Down Expand Up @@ -441,12 +442,32 @@ def _default_voice(self) -> Voice:
lang = canonicalize_lang(lang) or lang
return self._voice_for_lang.get(lang) or self._voice_for_lang['eng']

def _ensure_voice_is_downloaded(self, voice: Voice) -> tuple[str, str]:
def _paths_for_voice(self, voice: Voice) -> tuple[str, str]:
fname = voice.engine_data['model_filename']
model_path = os.path.join(cache_dir(), 'piper-voices', fname)
config_path = os.path.join(os.path.dirname(model_path), fname + '.json')
return model_path, config_path

def is_voice_downloaded(self, v: Voice) -> bool:
if not v.name:
v = self._default_voice
for path in self._paths_for_voice(v):
if not os.path.exists(path):
return False
return True

def delete_voice(self, v: Voice) -> None:
if not v.name:
v = self._default_voice
for path in self._paths_for_voice(v):
with suppress(FileNotFoundError):
os.remove(path)

def _download_voice(self, voice: Voice, download_even_if_exists: bool = False) -> tuple[str, str]:
model_path, config_path = self._paths_for_voice(voice)
if os.path.exists(model_path) and os.path.exists(config_path):
return model_path, config_path
if not download_even_if_exists:
return model_path, config_path
os.makedirs(os.path.dirname(model_path), exist_ok=True)
from calibre.gui2.tts.download import DownloadResources
d = DownloadResources(_('Downloading voice for Read aloud'), _('Downloading neural network for the {} voice').format(voice.human_name), {
Expand All @@ -457,6 +478,14 @@ def _ensure_voice_is_downloaded(self, voice: Voice) -> tuple[str, str]:
return model_path, config_path
return '', ''

def download_voice(self, v: Voice) -> None:
if not v.name:
v = self._default_voice
self._download_voice(v, download_even_if_exists=True)

def _ensure_voice_is_downloaded(self, voice: Voice) -> tuple[str, str]:
return self._download_voice(voice)

def validate_settings(self, s: EngineSpecificSettings, parent: QWidget | None) -> bool:
self._load_voice_metadata()
voice = self._voice_name_map.get(s.voice_name) or self._default_voice
Expand Down
12 changes: 11 additions & 1 deletion src/calibre/gui2/tts/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class EngineMetadata(NamedTuple):
can_change_pitch: bool = True
can_change_volume: bool = True
voices_have_quality_metadata: bool = False
has_managed_voices: bool = False


class Quality(Enum):
Expand Down Expand Up @@ -218,7 +219,7 @@ def qt_engine_metadata(name: str, human_name: str, desc: str, allows_choosing_au
ans['piper'] = EngineMetadata('piper', _('The Piper Neural Speech Engine'), _(
'The "piper" engine can track the currently spoken sentence on screen. It uses a neural network '
'for natural sounding voices. The neural network is run locally on your computer, it is fairly resource intensive to run.'
), TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True)
), TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True, has_managed_voices=True)
if islinux:
from speechd.paths import SPD_SPAWN_CMD
cmd = os.getenv("SPEECHD_CMD", SPD_SPAWN_CMD)
Expand Down Expand Up @@ -281,6 +282,15 @@ def reload_after_configure(self) -> None:
def validate_settings(self, s: EngineSpecificSettings, parent: QWidget | None) -> bool:
return True

def is_voice_downloaded(self, v: Voice) -> bool:
return True

def delete_voice(self, v: Voice) -> None:
pass

def download_voice(self, v: Voice) -> None:
pass


engine_instances: dict[str, TTSBackend] = {}

Expand Down

0 comments on commit 5657351

Please sign in to comment.