From a0d9c553d88b11c395e7a37cfaf0dbe901d50ed8 Mon Sep 17 00:00:00 2001 From: Yu-Han Liu Date: Thu, 12 Apr 2018 10:45:57 -0700 Subject: [PATCH] add Speech API auto punctuation sample [(#1446)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1446) * add auto punctuation sample * correct docstring --- .../samples/snippets/beta_snippets.py | 29 +++++++++++++++++++ .../samples/snippets/beta_snippets_test.py | 11 ++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-python-speech/samples/snippets/beta_snippets.py b/packages/google-cloud-python-speech/samples/snippets/beta_snippets.py index 12a7bbd2898f..95a9d8405e96 100644 --- a/packages/google-cloud-python-speech/samples/snippets/beta_snippets.py +++ b/packages/google-cloud-python-speech/samples/snippets/beta_snippets.py @@ -20,6 +20,7 @@ Example usage: python beta_snippets.py enhanced-model resources/commercial_mono.wav python beta_snippets.py metadata resources/commercial_mono.wav + python beta_snippets.py punctuation resources/commercial_mono.wav """ import argparse @@ -99,6 +100,32 @@ def transcribe_file_with_metadata(path): # [END speech_transcribe_file_with_metadata] +# [START speech_transcribe_file_with_auto_punctuation] +def transcribe_file_with_auto_punctuation(path): + """Transcribe the given audio file with auto punctuation enabled.""" + client = speech.SpeechClient() + + with io.open(path, 'rb') as audio_file: + content = audio_file.read() + + audio = speech.types.RecognitionAudio(content=content) + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, + language_code='en-US', + # Enable automatic punctuation + enable_automatic_punctuation=True) + + response = client.recognize(config, audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}'.format(i)) + print('Transcript: {}'.format(alternative.transcript)) +# [END speech_transcribe_file_with_auto_punctuation] + + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, @@ -113,3 +140,5 @@ def transcribe_file_with_metadata(path): transcribe_file_with_enhanced_model(args.path) elif args.command == 'metadata': transcribe_file_with_metadata(args.path) + elif args.command == 'punctuation': + transcribe_file_with_auto_punctuation(args.path) diff --git a/packages/google-cloud-python-speech/samples/snippets/beta_snippets_test.py b/packages/google-cloud-python-speech/samples/snippets/beta_snippets_test.py index ae578e2c4773..a241a435d6eb 100644 --- a/packages/google-cloud-python-speech/samples/snippets/beta_snippets_test.py +++ b/packages/google-cloud-python-speech/samples/snippets/beta_snippets_test.py @@ -14,7 +14,8 @@ import os from beta_snippets import ( - transcribe_file_with_enhanced_model, transcribe_file_with_metadata) + transcribe_file_with_auto_punctuation, transcribe_file_with_enhanced_model, + transcribe_file_with_metadata) RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') @@ -33,3 +34,11 @@ def test_transcribe_file_with_metadata(capsys): out, _ = capsys.readouterr() assert 'Chrome' in out + + +def test_transcribe_file_with_auto_punctuation(capsys): + transcribe_file_with_auto_punctuation( + os.path.join(RESOURCES, 'commercial_mono.wav')) + out, _ = capsys.readouterr() + + assert 'Okay. Sure.' in out