kxxt · kxxt · Nov 8, 2022 · Nov 6, 2022 · Nov 6, 2022 · Nov 7, 2022
diff --git a/src/aspeak/api/__init__.py b/src/aspeak/api/__init__.py
@@ -1,4 +1,3 @@
 from .format import FileFormat, AudioFormat
-from .functional import text_to_speech, pure_text_to_speech, ssml_to_speech
 from .provider import SpeechServiceProvider
 from .api import SpeechToFileService, SpeechToSpeakerService, SpeechServiceBase, SpeechToOneFileService
diff --git a/src/aspeak/api/api.py b/src/aspeak/api/api.py
@@ -3,7 +3,10 @@
 
 from .format import parse_format, AudioFormat, FileFormat
 from ..ssml import create_ssml
-from ..urls import ENDPOINT_URL
+from ..urls import GET_TOKEN
+from requests import get
+from re import search
+from time import time
 import azure.cognitiveservices.speech as speechsdk
 
 
@@ -23,7 +26,8 @@ class SpeechServiceBase:
     """
 
     def __init__(self, locale: Optional[str] = None, voice: Optional[str] = None,
-                 audio_format: Union[AudioFormat, FileFormat, speechsdk.SpeechSynthesisOutputFormat, None] = None,
+                 audio_format: Union[AudioFormat, FileFormat,
+                                     speechsdk.SpeechSynthesisOutputFormat, None] = None,
                  output: speechsdk.audio.AudioOutputConfig = None
                  ):
         """
@@ -32,26 +36,53 @@ def __init__(self, locale: Optional[str] = None, voice: Optional[str] = None,
         :param output: An instance of AudioOutputConfig.
         :param audio_format: The audio format, optional.
         """
-        self._config = speechsdk.SpeechConfig(endpoint=ENDPOINT_URL)
+        self._locale = locale
+        self._voice = voice
+        self._audio_format = audio_format
         self._output = output
-        if locale is not None:
-            self._config.speech_synthesis_language = locale
-        if voice is not None:
-            self._config.speech_synthesis_voice_name = voice
-        if audio_format is not None:
-            self._config.set_speech_synthesis_output_format(parse_format(audio_format))
-        self._synthesizer = speechsdk.SpeechSynthesizer(self._config, self._output)
+        self._config()
+
+    def _config(self):
+        response = get(GET_TOKEN)
+        response.raise_for_status()
+        html = response.text
+        token = search(r'token: "(.+)"', html)
+        region = search(r'region: "(.+)"', html)
+        assert token is not None
+        assert region is not None
+        self._time = time()
+        self._config = speechsdk.SpeechConfig(
+            region=region.group(1), auth_token="Bearer "+token.group(1))
+        if self._locale is not None:
+            self._config.speech_synthesis_language = self._locale
+        if self._voice is not None:
+            self._config.speech_synthesis_voice_name = self._voice
+        if self._audio_format is not None:
+            self._config.set_speech_synthesis_output_format(
+                parse_format(self._audio_format))
+        self._synthesizer = speechsdk.SpeechSynthesizer(
+            self._config, self._output)
+
+    def _renew(self):
+        now = time()
+        if now-self._time > 290:
+            self._config()
 
     def pure_text_to_speech(self, text, **kwargs):
+        self._renew()
+        print("TTS")
         return self._synthesizer.speak_text(text)
 
     def pure_text_to_speech_async(self, text, **kwargs):
+        self._renew()
         return self._synthesizer.speak_text_async(text)
 
     def ssml_to_speech(self, ssml, **kwargs):
+        self._renew()
         return self._synthesizer.speak_ssml(ssml)
 
     def ssml_to_speech_async(self, ssml, **kwargs):
+        self._renew()
         return self._synthesizer.speak_ssml_async(ssml)
 
     def text_to_speech(self, text, **kwargs):
@@ -65,6 +96,7 @@ def text_to_speech(self, text, **kwargs):
         role: The speaking role, optional. It only works for some Chinese voices.
         path: Output file path. Only works with SpeechService classes that support it.
         """
+        self._renew()
         ssml = create_ssml(text, *_parse_kwargs(**kwargs))
         return self._synthesizer.speak_ssml(ssml)
 
@@ -79,6 +111,7 @@ def text_to_speech_async(self, text, **kwargs):
         role: The speaking role, optional. It only works for some Chinese voices.
         path: Output file path. Only works with SpeechService classes that support it.
         """
+        self._renew()
         ssml = create_ssml(text, *_parse_kwargs(**kwargs))
         return self._synthesizer.speak_ssml_async(ssml)
 
@@ -89,7 +122,8 @@ class SpeechToSpeakerService(SpeechServiceBase):
     """
 
     def __init__(self, locale: str = None, voice: str = None,
-                 audio_format: Union[AudioFormat, FileFormat, speechsdk.SpeechSynthesisOutputFormat, None] = None,
+                 audio_format: Union[AudioFormat, FileFormat,
+                                     speechsdk.SpeechSynthesisOutputFormat, None] = None,
                  device_name: Union[str, None] = None):
         """
         :param locale: The locale of the voice, optional.
@@ -98,7 +132,8 @@ def __init__(self, locale: str = None, voice: str = None,
         :param device_name: Device name of the speaker, optional.
         """
         if device_name is None:
-            output = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
+            output = speechsdk.audio.AudioOutputConfig(
+                use_default_speaker=True)
         else:
             output = speechsdk.audio.AudioOutputConfig(device_name=device_name)
         super().__init__(locale, voice, audio_format, output)
@@ -127,16 +162,23 @@ def __init__(self, locale: Optional[str] = None, voice: Optional[str] = None,
         """
         super().__init__(locale, voice, audio_format, None)
 
-    pure_text_to_speech = _setup_synthesizer_for_file(SpeechServiceBase.pure_text_to_speech)
-    pure_text_to_speech_async = _setup_synthesizer_for_file(SpeechServiceBase.pure_text_to_speech_async)
-    text_to_speech = _setup_synthesizer_for_file(SpeechServiceBase.text_to_speech)
-    text_to_speech_async = _setup_synthesizer_for_file(SpeechServiceBase.text_to_speech_async)
-    ssml_to_speech = _setup_synthesizer_for_file(SpeechServiceBase.ssml_to_speech)
-    ssml_to_speech_async = _setup_synthesizer_for_file(SpeechServiceBase.ssml_to_speech_async)
+    pure_text_to_speech = _setup_synthesizer_for_file(
+        SpeechServiceBase.pure_text_to_speech)
+    pure_text_to_speech_async = _setup_synthesizer_for_file(
+        SpeechServiceBase.pure_text_to_speech_async)
+    text_to_speech = _setup_synthesizer_for_file(
+        SpeechServiceBase.text_to_speech)
+    text_to_speech_async = _setup_synthesizer_for_file(
+        SpeechServiceBase.text_to_speech_async)
+    ssml_to_speech = _setup_synthesizer_for_file(
+        SpeechServiceBase.ssml_to_speech)
+    ssml_to_speech_async = _setup_synthesizer_for_file(
+        SpeechServiceBase.ssml_to_speech_async)
 
     def _setup_synthesizer(self, file_path: str):
         self._output = speechsdk.audio.AudioOutputConfig(filename=file_path)
-        self._synthesizer = speechsdk.SpeechSynthesizer(self._config, self._output)
+        self._synthesizer = speechsdk.SpeechSynthesizer(
+            self._config, self._output)
 
 
 class SpeechToOneFileService(SpeechServiceBase):

diff --git a/src/aspeak/api/functional.py b/src/aspeak/api/functional.py
diff --git a/src/aspeak/urls.py b/src/aspeak/urls.py
@@ -1,5 +1,4 @@
-ENDPOINT_URL = 'wss://eastus.api.speech.microsoft.com/cognitiveservices/websocket/v1'
-
+GET_TOKEN="https://azure.microsoft.com/zh-cn/products/cognitive-services/speech-to-text/"
 
 def voice_list_url() -> str:
     return f'https://eastus.api.speech.microsoft.com/cognitiveservices/voices/list'