diff --git a/google-cloud-speech/samples/snippets/README.rst b/google-cloud-speech/samples/snippets/README.rst index ae1fe3c58a6f..9637ef4c7045 100644 --- a/google-cloud-speech/samples/snippets/README.rst +++ b/google-cloud-speech/samples/snippets/README.rst @@ -16,7 +16,7 @@ This directory contains samples for Google Cloud Speech API. The `Google Cloud S -.. _Google Cloud Speech API: https://cloud.google.com/speech/docs/ +.. _Google Cloud Speech API: https://cloud.google.com/speech/docs/ Setup ------------------------------------------------------------------------------- @@ -91,22 +91,21 @@ To run this sample: $ python transcribe.py usage: transcribe.py [-h] path - + Google Cloud Speech API sample application using the REST API for batch processing. - + Example usage: python transcribe.py resources/audio.raw python transcribe.py gs://cloud-samples-tests/speech/brooklyn.flac - + positional arguments: path File or GCS path for audio file to be recognized - + optional arguments: -h, --help show this help message and exit - Transcribe async +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -123,22 +122,21 @@ To run this sample: $ python transcribe_async.py usage: transcribe_async.py [-h] path - + Google Cloud Speech API sample application using the REST API for async batch processing. - + Example usage: python transcribe_async.py resources/audio.raw python transcribe_async.py gs://cloud-samples-tests/speech/vr.flac - + positional arguments: path File or GCS path for audio file to be recognized - + optional arguments: -h, --help show this help message and exit - Transcribe with word time offsets +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -155,21 +153,20 @@ To run this sample: $ python transcribe_word_time_offsets.py usage: transcribe_word_time_offsets.py [-h] path - + Google Cloud Speech API sample that demonstrates word time offsets. - + Example usage: python transcribe_word_time_offsets.py resources/audio.raw python transcribe_word_time_offsets.py gs://cloud-samples-tests/speech/vr.flac - + positional arguments: path File or GCS path for audio file to be recognized - + optional arguments: -h, --help show this help message and exit - Transcribe Streaming +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -186,19 +183,50 @@ To run this sample: $ python transcribe_streaming.py usage: transcribe_streaming.py [-h] stream - + Google Cloud Speech API sample application using the streaming API. - + Example usage: python transcribe_streaming.py resources/audio.raw - + positional arguments: stream File to stream to the API - + optional arguments: -h, --help show this help message and exit +Beta Samples ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/cloud-client/beta_snippets.py;speech/cloud-client/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python beta_snippets.py + + usage: beta_snippets.py [-h] command path + + Google Cloud Speech API sample that demonstrates enhanced models + and recognition metadata. + + Example usage: + python beta_snippets.py enhanced-model resources/commercial_mono.wav + python beta_snippets.py metadata resources/commercial_mono.wav + + positional arguments: + command + path File for audio file to be recognized + + optional arguments: + -h, --help show this help message and exit + diff --git a/google-cloud-speech/samples/snippets/README.rst.in b/google-cloud-speech/samples/snippets/README.rst.in index ae84c9e824ad..18aa61f0cc6f 100644 --- a/google-cloud-speech/samples/snippets/README.rst.in +++ b/google-cloud-speech/samples/snippets/README.rst.in @@ -34,6 +34,9 @@ samples: - name: Transcribe Streaming file: transcribe_streaming.py show_help: true +- name: Beta Samples + file: beta_snippets.py + show_help: true cloud_client_library: true diff --git a/google-cloud-speech/samples/snippets/beta_snippets.py b/google-cloud-speech/samples/snippets/beta_snippets.py new file mode 100644 index 000000000000..12a7bbd2898f --- /dev/null +++ b/google-cloud-speech/samples/snippets/beta_snippets.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python + +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud Speech API sample that demonstrates enhanced models +and recognition metadata. + +Example usage: + python beta_snippets.py enhanced-model resources/commercial_mono.wav + python beta_snippets.py metadata resources/commercial_mono.wav +""" + +import argparse +import io + +from google.cloud import speech_v1p1beta1 as speech + + +# [START speech_transcribe_file_with_enhanced_model] +def transcribe_file_with_enhanced_model(path): + """Transcribe the given audio file using an enhanced model.""" + client = speech.SpeechClient() + + with io.open(path, 'rb') as audio_file: + content = audio_file.read() + + audio = speech.types.RecognitionAudio(content=content) + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, + language_code='en-US', + # Enhanced models are only available to projects that + # opt in for audio data collection. + use_enhanced=True, + # A model must be specified to use enhanced model. + model='phone_call') + + response = client.recognize(config, audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}'.format(i)) + print('Transcript: {}'.format(alternative.transcript)) +# [END speech_transcribe_file_with_enhanced_model] + + +# [START speech_transcribe_file_with_metadata] +def transcribe_file_with_metadata(path): + """Send a request that includes recognition metadata.""" + client = speech.SpeechClient() + + with io.open(path, 'rb') as audio_file: + content = audio_file.read() + + # Here we construct a recognition metadata object. + # Most metadata fields are specified as enums that can be found + # in speech.enums.RecognitionMetadata + metadata = speech.types.RecognitionMetadata() + metadata.interaction_type = ( + speech.enums.RecognitionMetadata.InteractionType.DISCUSSION) + metadata.microphone_distance = ( + speech.enums.RecognitionMetadata.MicrophoneDistance.NEARFIELD) + metadata.recording_device_type = ( + speech.enums.RecognitionMetadata.RecordingDeviceType.SMARTPHONE) + # Some metadata fields are free form strings + metadata.recording_device_name = "Pixel 2 XL" + # And some are integers, for instance the 6 digit NAICS code + # https://www.naics.com/search/ + metadata.industry_naics_code_of_audio = 519190 + + audio = speech.types.RecognitionAudio(content=content) + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, + language_code='en-US', + # Add this in the request to send metadata. + metadata=metadata) + + response = client.recognize(config, audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}'.format(i)) + print('Transcript: {}'.format(alternative.transcript)) +# [END speech_transcribe_file_with_metadata] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('command') + parser.add_argument( + 'path', help='File for audio file to be recognized') + + args = parser.parse_args() + + if args.command == 'enhanced-model': + transcribe_file_with_enhanced_model(args.path) + elif args.command == 'metadata': + transcribe_file_with_metadata(args.path) diff --git a/google-cloud-speech/samples/snippets/beta_snippets_test.py b/google-cloud-speech/samples/snippets/beta_snippets_test.py new file mode 100644 index 000000000000..ae578e2c4773 --- /dev/null +++ b/google-cloud-speech/samples/snippets/beta_snippets_test.py @@ -0,0 +1,35 @@ +# Copyright 2018, Google, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from beta_snippets import ( + transcribe_file_with_enhanced_model, transcribe_file_with_metadata) + +RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') + + +def test_transcribe_file_with_enhanced_model(capsys): + transcribe_file_with_enhanced_model( + os.path.join(RESOURCES, 'commercial_mono.wav')) + out, _ = capsys.readouterr() + + assert 'Chrome' in out + + +def test_transcribe_file_with_metadata(capsys): + transcribe_file_with_metadata( + os.path.join(RESOURCES, 'commercial_mono.wav')) + out, _ = capsys.readouterr() + + assert 'Chrome' in out diff --git a/google-cloud-speech/samples/snippets/requirements.txt b/google-cloud-speech/samples/snippets/requirements.txt index 2386b788dd5a..87b74e0d7334 100644 --- a/google-cloud-speech/samples/snippets/requirements.txt +++ b/google-cloud-speech/samples/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-speech==0.32.1 +google-cloud-speech==0.33.0 diff --git a/google-cloud-speech/samples/snippets/resources/commercial_mono.wav b/google-cloud-speech/samples/snippets/resources/commercial_mono.wav new file mode 100644 index 000000000000..e6b9ed434f9f Binary files /dev/null and b/google-cloud-speech/samples/snippets/resources/commercial_mono.wav differ