Skip to content
This repository has been archived by the owner on Apr 20, 2024. It is now read-only.

Commit

Permalink
Speech gapic client library [(#1012)](GoogleCloudPlatform/python-docs…
Browse files Browse the repository at this point in the history
…-samples#1012)

* Migrate quickstart to GAPIC client library

* Migrate transcribe to GAPIC client library

* Migrate transcribe_async to GAPIC client library

* Migrate transcribe_streaming to GAPIC client library

* clean up

* clean up

* Import from google.cloud.speech

* update transcribe samples

* import in alphabetic order

* remove unused variable

* use strings instead of enums

* restructure code

* comment on sreaming requests

* import style

* flake

* correct indent

* migrate transcribe_streaming_mic to gapic

* update google-cloud-speech version requirement

* addressing review comments

* at the end of the audio stream, put None to signal to the generator

* flake

* addressing github review comments

* add region tags for migration guide

* update README

* rst format

* bullet

* addressing PR review comments

* use enums

* remove a word
  • Loading branch information
dizcology authored and busunkim96 committed Sep 3, 2020
1 parent 84e7d8c commit fea2f7a
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 98 deletions.
4 changes: 4 additions & 0 deletions samples/snippets/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Google Cloud Speech API Python Samples

This directory contains samples for Google Cloud Speech API. The `Google Cloud Speech API`_ enables easy integration of Google speech recognition technologies into developer applications. Send audio and receive a text transcription from the Cloud Speech API service.

- See the `migration guide`_ for information about migrating to Python client library v0.27.

.. _migration guide: https://cloud.google.com/speech/docs/python-client-migration




Expand Down
6 changes: 6 additions & 0 deletions samples/snippets/README.rst.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ product:
recognition technologies into developer applications. Send audio and receive
a text transcription from the Cloud Speech API service.


- See the `migration guide`_ for information about migrating to Python client library v0.27.


.. _migration guide: https://cloud.google.com/speech/docs/python-client-migration

setup:
- auth
- install_deps
Expand Down
22 changes: 15 additions & 7 deletions samples/snippets/quickstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,16 @@ def run_quickstart():
import os

# Imports the Google Cloud client library
# [START migration_import]
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
# [END migration_import]

# Instantiates a client
speech_client = speech.Client()
# [START migration_client]
client = speech.SpeechClient()
# [END migration_client]

# The name of the audio file to transcribe
file_name = os.path.join(
Expand All @@ -35,14 +41,16 @@ def run_quickstart():
# Loads the audio into memory
with io.open(file_name, 'rb') as audio_file:
content = audio_file.read()
sample = speech_client.sample(
content,
source_uri=None,
encoding='LINEAR16',
sample_rate_hertz=16000)
audio = types.RecognitionAudio(content=content)

config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')

# Detects speech in the audio file
alternatives = sample.recognize('en-US')
response = client.recognize(config, audio)
alternatives = response.results[0].alternatives

for alternative in alternatives:
print('Transcript: {}'.format(alternative.transcript))
Expand Down
2 changes: 1 addition & 1 deletion samples/snippets/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
google-cloud-speech==0.26.0
google-cloud-speech==0.27.0
45 changes: 31 additions & 14 deletions samples/snippets/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,33 +31,50 @@
def transcribe_file(speech_file):
"""Transcribe the given audio file."""
from google.cloud import speech
speech_client = speech.Client()
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_sync_request]
# [START migration_audio_config_file]
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()
audio_sample = speech_client.sample(
content=content,
source_uri=None,
encoding='LINEAR16',
sample_rate_hertz=16000)

alternatives = audio_sample.recognize('en-US')
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
# [END migration_audio_config_file]

# [START migration_sync_response]
response = client.recognize(config, audio)
# [END migration_sync_request]
alternatives = response.results[0].alternatives

for alternative in alternatives:
print('Transcript: {}'.format(alternative.transcript))
# [END migration_sync_response]


def transcribe_gcs(gcs_uri):
"""Transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
speech_client = speech.Client()
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_audio_config_gcs]
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=16000,
language_code='en-US')
# [END migration_audio_config_gcs]

audio_sample = speech_client.sample(
content=None,
source_uri=gcs_uri,
encoding='FLAC',
sample_rate_hertz=16000)
response = client.recognize(config, audio)
alternatives = response.results[0].alternatives

alternatives = audio_sample.recognize('en-US')
for alternative in alternatives:
print('Transcript: {}'.format(alternative.transcript))

Expand Down
54 changes: 30 additions & 24 deletions samples/snippets/transcribe_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,63 +30,69 @@
def transcribe_file(speech_file):
"""Transcribe the given audio file asynchronously."""
from google.cloud import speech
speech_client = speech.Client()
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_async_request]
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()
audio_sample = speech_client.sample(
content,
source_uri=None,
encoding='LINEAR16',
sample_rate_hertz=16000)

operation = audio_sample.long_running_recognize('en-US')
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')

# [START migration_async_response]
operation = client.long_running_recognize(config, audio)
# [END migration_async_request]

# Sleep and poll operation.done()
retry_count = 100
while retry_count > 0 and not operation.complete:
while retry_count > 0 and not operation.done():
retry_count -= 1
time.sleep(2)
operation.poll()

if not operation.complete:
if not operation.done():
print('Operation not complete and retry limit reached.')
return

alternatives = operation.results
alternatives = operation.result().results[0].alternatives
for alternative in alternatives:
print('Transcript: {}'.format(alternative.transcript))
print('Confidence: {}'.format(alternative.confidence))
# [END send_request]
# [END migration_async_response]


def transcribe_gcs(gcs_uri):
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
speech_client = speech.Client()
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

audio_sample = speech_client.sample(
content=None,
source_uri=gcs_uri,
encoding='FLAC',
sample_rate_hertz=16000)
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=16000,
language_code='en-US')

operation = audio_sample.long_running_recognize('en-US')
operation = client.long_running_recognize(config, audio)

retry_count = 100
while retry_count > 0 and not operation.complete:
while retry_count > 0 and not operation.done():
retry_count -= 1
time.sleep(2)
operation.poll()

if not operation.complete:
if not operation.done():
print('Operation not complete and retry limit reached.')
return

alternatives = operation.results
alternatives = operation.result().results[0].alternatives
for alternative in alternatives:
print('Transcript: {}'.format(alternative.transcript))
print('Confidence: {}'.format(alternative.confidence))
# [END send_request_gcs]


if __name__ == '__main__':
Expand Down
43 changes: 31 additions & 12 deletions samples/snippets/transcribe_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,39 @@
def transcribe_streaming(stream_file):
"""Streams transcription of the given audio file."""
from google.cloud import speech
speech_client = speech.Client()
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()

# [START migration_streaming_request]
with io.open(stream_file, 'rb') as audio_file:
audio_sample = speech_client.sample(
stream=audio_file,
encoding=speech.encoding.Encoding.LINEAR16,
sample_rate_hertz=16000)
alternatives = audio_sample.streaming_recognize('en-US')

for alternative in alternatives:
print('Finished: {}'.format(alternative.is_final))
print('Stability: {}'.format(alternative.stability))
print('Confidence: {}'.format(alternative.confidence))
print('Transcript: {}'.format(alternative.transcript))
content = audio_file.read()

# In practice, stream should be a generator yielding chunks of audio data.
stream = [content]
requests = (types.StreamingRecognizeRequest(audio_content=chunk)
for chunk in stream)

config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
streaming_config = types.StreamingRecognitionConfig(config=config)

# streaming_recognize returns a generator.
# [START migration_streaming_response]
responses = client.streaming_recognize(streaming_config, requests)
# [END migration_streaming_request]

for response in responses:
for result in response.results:
print('Finished: {}'.format(result.is_final))
print('Stability: {}'.format(result.stability))
alternatives = result.alternatives
for alternative in alternatives:
print('Confidence: {}'.format(alternative.confidence))
print('Transcript: {}'.format(alternative.transcript))
# [END migration_streaming_response]


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit fea2f7a

Please sign in to comment.