From 1df48e92d8f41f079fe1f0580c2338eb0b29e387 Mon Sep 17 00:00:00 2001 From: Noah Negrey Date: Mon, 26 Mar 2018 14:47:37 -0700 Subject: [PATCH] Add text-to-speech beta samples [(#1421)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1421) --- samples/README.rst | 167 ++++++++++++++++++++++++++++++++ samples/README.rst.in | 26 +++++ samples/list_voices.py | 56 +++++++++++ samples/list_voices_test.py | 23 +++++ samples/quickstart.py | 62 ++++++++++++ samples/requirements.txt | 1 + samples/resources/hello.ssml | 1 + samples/resources/hello.txt | 1 + samples/synthesize_file.py | 102 +++++++++++++++++++ samples/synthesize_file_test.py | 37 +++++++ samples/synthesize_text.py | 100 +++++++++++++++++++ samples/synthesize_text_test.py | 37 +++++++ 12 files changed, 613 insertions(+) create mode 100644 samples/README.rst create mode 100644 samples/README.rst.in create mode 100644 samples/list_voices.py create mode 100644 samples/list_voices_test.py create mode 100644 samples/quickstart.py create mode 100644 samples/requirements.txt create mode 100644 samples/resources/hello.ssml create mode 100644 samples/resources/hello.txt create mode 100644 samples/synthesize_file.py create mode 100644 samples/synthesize_file_test.py create mode 100644 samples/synthesize_text.py create mode 100644 samples/synthesize_text_test.py diff --git a/samples/README.rst b/samples/README.rst new file mode 100644 index 00000000..a917784c --- /dev/null +++ b/samples/README.rst @@ -0,0 +1,167 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Text-to-Speech API Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=/README.rst + + +This directory contains samples for Google Cloud Text-to-Speech API. The `Google Cloud Text To Speech API`_ enables you to generate and customize synthesized speech from text or SSML. + + + + +.. _Google Cloud Text-to-Speech API: https://cloud.google.com/text-to-speech/docs/ + +Setup +------------------------------------------------------------------------------- + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + +Install Dependencies +++++++++++++++++++++ + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + +Samples +------------------------------------------------------------------------------- + +Quickstart ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=/quickstart.py;/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python quickstart.py + + +List voices ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=/list_voices.py;/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python list_voices.py + + +Synthesize text ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=/synthesize_text.py;/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python synthesize_text.py + + usage: synthesize_text.py [-h] (--text TEXT | --ssml SSML) + + Google Cloud Text-To-Speech API sample application . + + Example usage: + python synthesize_text.py --text "hello" + python synthesize_text.py --ssml "Hello there." + + optional arguments: + -h, --help show this help message and exit + --text TEXT The text from which to synthesize speech. + --ssml SSML The ssml string from which to synthesize speech. + + + +Synthesize file ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=/synthesize_file.py;/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python synthesize_file.py + + usage: synthesize_file.py [-h] (--text TEXT | --ssml SSML) + + Google Cloud Text-To-Speech API sample application . + + Example usage: + python synthesize_file.py --text resources/hello.txt + python synthesize_file.py --ssml resources/hello.ssml + + optional arguments: + -h, --help show this help message and exit + --text TEXT The text file from which to synthesize speech. + --ssml SSML The ssml file from which to synthesize speech. + + + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/samples/README.rst.in b/samples/README.rst.in new file mode 100644 index 00000000..e0cee295 --- /dev/null +++ b/samples/README.rst.in @@ -0,0 +1,26 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Text-to-Speech API + short_name: Cloud TTS API + url: https://cloud.google.com/text-to-speech/docs/ + description: > + The `Google Cloud Text To Speech API`_ enables you to generate and customize synthesized speech from text or SSML. + +setup: +- auth +- install_deps + +samples: +- name: Quickstart + file: quickstart.py +- name: List voices + file: list_voices.py +- name: Synthesize text + file: synthesize_text.py + show_help: True +- name: Synthesize file + file: synthesize_file.py + show_help: True + +cloud_client_library: true diff --git a/samples/list_voices.py b/samples/list_voices.py new file mode 100644 index 00000000..3f43499c --- /dev/null +++ b/samples/list_voices.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud Text-To-Speech API sample application. + +Example usage: + python list_voices.py +""" + + +# [START tts_list_voices] +def list_voices(): + """Lists the available voices.""" + from google.cloud import texttospeech + client = texttospeech.TextToSpeechClient() + + # Performs the list voices request + voices = client.list_voices() + + for voice in voices.voices: + # Display the voice's name. Example: tpc-vocoded + print('Name: {}'.format(voice.name)) + + # Display the supported language codes for this voice. Example: "en-US" + for language_code in voice.language_codes: + print('Supported language: {}'.format(language_code)) + + # SSML Voice Gender values from google.cloud.texttospeech.enums + ssml_voice_genders = ['SSML_VOICE_GENDER_UNSPECIFIED', 'MALE', + 'FEMALE', 'NEUTRAL'] + + # Display the SSML Voice Gender + print('SSML Voice Gender: {}'.format( + ssml_voice_genders[voice.ssml_gender])) + + # Display the natural sample rate hertz for this voice. Example: 24000 + print('Natural Sample Rate Hertz: {}\n'.format( + voice.natural_sample_rate_hertz)) +# [END tts_list_voices] + + +if __name__ == '__main__': + list_voices() diff --git a/samples/list_voices_test.py b/samples/list_voices_test.py new file mode 100644 index 00000000..fd325569 --- /dev/null +++ b/samples/list_voices_test.py @@ -0,0 +1,23 @@ +# Copyright 2018, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import list_voices + + +def test_list_voices(capsys): + list_voices.list_voices() + out, err = capsys.readouterr() + + assert 'en-US' in out + assert 'SSML Voice Gender: MALE' in out + assert 'SSML Voice Gender: FEMALE' in out diff --git a/samples/quickstart.py b/samples/quickstart.py new file mode 100644 index 00000000..f462139d --- /dev/null +++ b/samples/quickstart.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python + +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud Text-To-Speech API sample application . + +Example usage: + python quickstart.py +""" + + +def run_quickstart(): + # [START tts_quickstart] + """Synthesizes speech from the input string of text or ssml. + + Note: ssml must be well-formed according to: + https://www.w3.org/TR/speech-synthesis/ + """ + from google.cloud import texttospeech + + # Instantiates a client + client = texttospeech.TextToSpeechClient() + + # Set the text input to be synthesized + synthesis_input = texttospeech.types.SynthesisInput(text="Hello, World!") + + # Build the voice request, select the language code ("en-US") and the ssml + # voice gender ("neutral") + voice = texttospeech.types.VoiceSelectionParams( + language_code='en-US', + ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) + + # Select the type of audio file you want returned + audio_config = texttospeech.types.AudioConfig( + audio_encoding=texttospeech.enums.AudioEncoding.MP3) + + # Perform the text-to-speech request on the text input with the selected + # voice parameters and audio file type + response = client.synthesize_speech(synthesis_input, voice, audio_config) + + # The response's audio_content is binary. + with open('output.mp3', 'wb') as out: + # Write the response to the output file. + out.write(response.audio_content) + print('Audio content written to file "output.mp3"') + # [END tts_quickstart] + + +if __name__ == '__main__': + run_quickstart() diff --git a/samples/requirements.txt b/samples/requirements.txt new file mode 100644 index 00000000..4fd188de --- /dev/null +++ b/samples/requirements.txt @@ -0,0 +1 @@ +google-cloud-texttospeech==0.1.0 diff --git a/samples/resources/hello.ssml b/samples/resources/hello.ssml new file mode 100644 index 00000000..cd347b71 --- /dev/null +++ b/samples/resources/hello.ssml @@ -0,0 +1 @@ +Hello there. \ No newline at end of file diff --git a/samples/resources/hello.txt b/samples/resources/hello.txt new file mode 100644 index 00000000..cd773cd1 --- /dev/null +++ b/samples/resources/hello.txt @@ -0,0 +1 @@ +Hello there! \ No newline at end of file diff --git a/samples/synthesize_file.py b/samples/synthesize_file.py new file mode 100644 index 00000000..f62d6330 --- /dev/null +++ b/samples/synthesize_file.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud Text-To-Speech API sample application . + +Example usage: + python synthesize_file.py --text resources/hello.txt + python synthesize_file.py --ssml resources/hello.ssml +""" + +import argparse + + +# [START tts_synthesize_text_file] +def synthesize_text_file(text_file): + """Synthesizes speech from the input file of text.""" + from google.cloud import texttospeech + client = texttospeech.TextToSpeechClient() + + with open(text_file, 'r') as f: + text = f.read() + input_text = texttospeech.types.SynthesisInput(text=text) + + # Note: the voice can also be specified by name. + # Names of voices can be retrieved with client.list_voices(). + voice = texttospeech.types.VoiceSelectionParams( + language_code='en-US', + ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) + + audio_config = texttospeech.types.AudioConfig( + audio_encoding=texttospeech.enums.AudioEncoding.MP3) + + response = client.synthesize_speech(input_text, voice, audio_config) + + # The response's audio_content is binary. + with open('output.mp3', 'wb') as out: + out.write(response.audio_content) + print('Audio content written to file "output.mp3"') +# [END tts_synthesize_text_file] + + +# [START tts_synthesize_ssml_file] +def synthesize_ssml_file(ssml_file): + """Synthesizes speech from the input file of ssml. + + Note: ssml must be well-formed according to: + https://www.w3.org/TR/speech-synthesis/ + """ + from google.cloud import texttospeech + client = texttospeech.TextToSpeechClient() + + with open(ssml_file, 'r') as f: + ssml = f.read() + input_text = texttospeech.types.SynthesisInput(ssml=ssml) + + # Note: the voice can also be specified by name. + # Names of voices can be retrieved with client.list_voices(). + voice = texttospeech.types.VoiceSelectionParams( + language_code='en-US', + ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) + + audio_config = texttospeech.types.AudioConfig( + audio_encoding=texttospeech.enums.AudioEncoding.MP3) + + response = client.synthesize_speech(input_text, voice, audio_config) + + # The response's audio_content is binary. + with open('output.mp3', 'wb') as out: + out.write(response.audio_content) + print('Audio content written to file "output.mp3"') +# [END tts_synthesize_ssml_file] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--text', + help='The text file from which to synthesize speech.') + group.add_argument('--ssml', + help='The ssml file from which to synthesize speech.') + + args = parser.parse_args() + + if args.text: + synthesize_text_file(args.text) + else: + synthesize_ssml_file(args.ssml) diff --git a/samples/synthesize_file_test.py b/samples/synthesize_file_test.py new file mode 100644 index 00000000..2652009f --- /dev/null +++ b/samples/synthesize_file_test.py @@ -0,0 +1,37 @@ +# Copyright 2018, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import synthesize_file + +TEXT_FILE = 'resources/hello.txt' +SSML_FILE = 'resources/hello.ssml' + + +def test_synthesize_text_file(capsys): + synthesize_file.synthesize_text_file(text_file=TEXT_FILE) + out, err = capsys.readouterr() + + assert 'Audio content written to file' in out + statinfo = os.stat('output.mp3') + assert statinfo.st_size > 0 + + +def test_synthesize_ssml_file(capsys): + synthesize_file.synthesize_ssml_file(ssml_file=SSML_FILE) + out, err = capsys.readouterr() + + assert 'Audio content written to file' in out + statinfo = os.stat('output.mp3') + assert statinfo.st_size > 0 diff --git a/samples/synthesize_text.py b/samples/synthesize_text.py new file mode 100644 index 00000000..d5886bd1 --- /dev/null +++ b/samples/synthesize_text.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud Text-To-Speech API sample application . + +Example usage: + python synthesize_text.py --text "hello" + python synthesize_text.py --ssml "Hello there." +""" + +import argparse + + +# [START tts_synthesize_text] +def synthesize_text(text): + """Synthesizes speech from the input string of text.""" + from google.cloud import texttospeech + client = texttospeech.TextToSpeechClient() + + input_text = texttospeech.types.SynthesisInput(text=text) + + # Note: the voice can also be specified by name. + # Names of voices can be retrieved with client.list_voices(). + voice = texttospeech.types.VoiceSelectionParams( + language_code='en-US', + ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) + + audio_config = texttospeech.types.AudioConfig( + audio_encoding=texttospeech.enums.AudioEncoding.MP3) + + response = client.synthesize_speech(input_text, voice, audio_config) + + # The response's audio_content is binary. + with open('output.mp3', 'wb') as out: + out.write(response.audio_content) + print('Audio content written to file "output.mp3"') +# [END tts_synthesize_text] + + +# [START tts_synthesize_ssml] +def synthesize_ssml(ssml): + """Synthesizes speech from the input string of ssml. + + Note: ssml must be well-formed according to: + https://www.w3.org/TR/speech-synthesis/ + + Example: Hello there. + """ + from google.cloud import texttospeech + client = texttospeech.TextToSpeechClient() + + input_text = texttospeech.types.SynthesisInput(ssml=ssml) + + # Note: the voice can also be specified by name. + # Names of voices can be retrieved with client.list_voices(). + voice = texttospeech.types.VoiceSelectionParams( + language_code='en-US', + ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) + + audio_config = texttospeech.types.AudioConfig( + audio_encoding=texttospeech.enums.AudioEncoding.MP3) + + response = client.synthesize_speech(input_text, voice, audio_config) + + # The response's audio_content is binary. + with open('output.mp3', 'wb') as out: + out.write(response.audio_content) + print('Audio content written to file "output.mp3"') +# [END tts_synthesize_ssml] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--text', + help='The text from which to synthesize speech.') + group.add_argument('--ssml', + help='The ssml string from which to synthesize speech.') + + args = parser.parse_args() + + if args.text: + synthesize_text(args.text) + else: + synthesize_ssml(args.ssml) diff --git a/samples/synthesize_text_test.py b/samples/synthesize_text_test.py new file mode 100644 index 00000000..948d58da --- /dev/null +++ b/samples/synthesize_text_test.py @@ -0,0 +1,37 @@ +# Copyright 2018, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import synthesize_text + +TEXT = 'Hello there.' +SSML = 'Hello there.' + + +def test_synthesize_text(capsys): + synthesize_text.synthesize_text(text=TEXT) + out, err = capsys.readouterr() + + assert 'Audio content written to file' in out + statinfo = os.stat('output.mp3') + assert statinfo.st_size > 0 + + +def test_synthesize_ssml(capsys): + synthesize_text.synthesize_ssml(ssml=SSML) + out, err = capsys.readouterr() + + assert 'Audio content written to file' in out + statinfo = os.stat('output.mp3') + assert statinfo.st_size > 0