Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-enable trial service and remove legacy API #35

Merged
merged 5 commits into from
Nov 8, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/aspeak/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from .format import FileFormat, AudioFormat
from .functional import text_to_speech, pure_text_to_speech, ssml_to_speech
from .provider import SpeechServiceProvider
from .api import SpeechToFileService, SpeechToSpeakerService, SpeechServiceBase, SpeechToOneFileService
80 changes: 61 additions & 19 deletions src/aspeak/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@

from .format import parse_format, AudioFormat, FileFormat
from ..ssml import create_ssml
from ..urls import ENDPOINT_URL
from ..urls import GET_TOKEN
from requests import get
from re import search
from time import time
import azure.cognitiveservices.speech as speechsdk


Expand All @@ -23,7 +26,8 @@ class SpeechServiceBase:
"""

def __init__(self, locale: Optional[str] = None, voice: Optional[str] = None,
audio_format: Union[AudioFormat, FileFormat, speechsdk.SpeechSynthesisOutputFormat, None] = None,
audio_format: Union[AudioFormat, FileFormat,
speechsdk.SpeechSynthesisOutputFormat, None] = None,
output: speechsdk.audio.AudioOutputConfig = None
):
"""
Expand All @@ -32,26 +36,53 @@ def __init__(self, locale: Optional[str] = None, voice: Optional[str] = None,
:param output: An instance of AudioOutputConfig.
:param audio_format: The audio format, optional.
"""
self._config = speechsdk.SpeechConfig(endpoint=ENDPOINT_URL)
self._locale = locale
self._voice = voice
self._audio_format = audio_format
self._output = output
if locale is not None:
self._config.speech_synthesis_language = locale
if voice is not None:
self._config.speech_synthesis_voice_name = voice
if audio_format is not None:
self._config.set_speech_synthesis_output_format(parse_format(audio_format))
self._synthesizer = speechsdk.SpeechSynthesizer(self._config, self._output)
self._config()

def _config(self):
response = get(GET_TOKEN)
response.raise_for_status()
html = response.text
token = search(r'token: "(.+)"', html)
region = search(r'region: "(.+)"', html)
assert token is not None
assert region is not None
self._time = time()
self._config = speechsdk.SpeechConfig(
region=region.group(1), auth_token="Bearer "+token.group(1))
if self._locale is not None:
self._config.speech_synthesis_language = self._locale
if self._voice is not None:
self._config.speech_synthesis_voice_name = self._voice
if self._audio_format is not None:
self._config.set_speech_synthesis_output_format(
parse_format(self._audio_format))
self._synthesizer = speechsdk.SpeechSynthesizer(
self._config, self._output)

def _renew(self):
now = time()
if now-self._time > 290:
self._config()

def pure_text_to_speech(self, text, **kwargs):
self._renew()
print("TTS")
return self._synthesizer.speak_text(text)

def pure_text_to_speech_async(self, text, **kwargs):
self._renew()
return self._synthesizer.speak_text_async(text)

def ssml_to_speech(self, ssml, **kwargs):
self._renew()
return self._synthesizer.speak_ssml(ssml)

def ssml_to_speech_async(self, ssml, **kwargs):
self._renew()
return self._synthesizer.speak_ssml_async(ssml)

def text_to_speech(self, text, **kwargs):
Expand All @@ -65,6 +96,7 @@ def text_to_speech(self, text, **kwargs):
role: The speaking role, optional. It only works for some Chinese voices.
path: Output file path. Only works with SpeechService classes that support it.
"""
self._renew()
ssml = create_ssml(text, *_parse_kwargs(**kwargs))
return self._synthesizer.speak_ssml(ssml)

Expand All @@ -79,6 +111,7 @@ def text_to_speech_async(self, text, **kwargs):
role: The speaking role, optional. It only works for some Chinese voices.
path: Output file path. Only works with SpeechService classes that support it.
"""
self._renew()
ssml = create_ssml(text, *_parse_kwargs(**kwargs))
return self._synthesizer.speak_ssml_async(ssml)

Expand All @@ -89,7 +122,8 @@ class SpeechToSpeakerService(SpeechServiceBase):
"""

def __init__(self, locale: str = None, voice: str = None,
audio_format: Union[AudioFormat, FileFormat, speechsdk.SpeechSynthesisOutputFormat, None] = None,
audio_format: Union[AudioFormat, FileFormat,
speechsdk.SpeechSynthesisOutputFormat, None] = None,
device_name: Union[str, None] = None):
"""
:param locale: The locale of the voice, optional.
Expand All @@ -98,7 +132,8 @@ def __init__(self, locale: str = None, voice: str = None,
:param device_name: Device name of the speaker, optional.
"""
if device_name is None:
output = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
output = speechsdk.audio.AudioOutputConfig(
use_default_speaker=True)
else:
output = speechsdk.audio.AudioOutputConfig(device_name=device_name)
super().__init__(locale, voice, audio_format, output)
Expand Down Expand Up @@ -127,16 +162,23 @@ def __init__(self, locale: Optional[str] = None, voice: Optional[str] = None,
"""
super().__init__(locale, voice, audio_format, None)

pure_text_to_speech = _setup_synthesizer_for_file(SpeechServiceBase.pure_text_to_speech)
pure_text_to_speech_async = _setup_synthesizer_for_file(SpeechServiceBase.pure_text_to_speech_async)
text_to_speech = _setup_synthesizer_for_file(SpeechServiceBase.text_to_speech)
text_to_speech_async = _setup_synthesizer_for_file(SpeechServiceBase.text_to_speech_async)
ssml_to_speech = _setup_synthesizer_for_file(SpeechServiceBase.ssml_to_speech)
ssml_to_speech_async = _setup_synthesizer_for_file(SpeechServiceBase.ssml_to_speech_async)
pure_text_to_speech = _setup_synthesizer_for_file(
SpeechServiceBase.pure_text_to_speech)
pure_text_to_speech_async = _setup_synthesizer_for_file(
SpeechServiceBase.pure_text_to_speech_async)
text_to_speech = _setup_synthesizer_for_file(
SpeechServiceBase.text_to_speech)
text_to_speech_async = _setup_synthesizer_for_file(
SpeechServiceBase.text_to_speech_async)
ssml_to_speech = _setup_synthesizer_for_file(
SpeechServiceBase.ssml_to_speech)
ssml_to_speech_async = _setup_synthesizer_for_file(
SpeechServiceBase.ssml_to_speech_async)

def _setup_synthesizer(self, file_path: str):
self._output = speechsdk.audio.AudioOutputConfig(filename=file_path)
self._synthesizer = speechsdk.SpeechSynthesizer(self._config, self._output)
self._synthesizer = speechsdk.SpeechSynthesizer(
self._config, self._output)


class SpeechToOneFileService(SpeechServiceBase):
Expand Down
86 changes: 0 additions & 86 deletions src/aspeak/api/functional.py

This file was deleted.

3 changes: 1 addition & 2 deletions src/aspeak/urls.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
ENDPOINT_URL = 'wss://eastus.api.speech.microsoft.com/cognitiveservices/websocket/v1'

GET_TOKEN="https://azure.microsoft.com/zh-cn/products/cognitive-services/speech-to-text/"

def voice_list_url() -> str:
return f'https://eastus.api.speech.microsoft.com/cognitiveservices/voices/list'