Skip to content

Commit

Permalink
Merge pull request #2724 from daspecster/move-speech-methods-to-sample
Browse files Browse the repository at this point in the history
Move speech methods to sample.
  • Loading branch information
daspecster authored Nov 14, 2016
2 parents 1157488 + 8743fe3 commit dfc967d
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 249 deletions.
17 changes: 7 additions & 10 deletions docs/speech-usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ See: `Speech Asynchronous Recognize`_
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
... encoding=speech.Encoding.LINEAR16,
... sample_rate=44100)
>>> operation = client.async_recognize(sample, max_alternatives=2)
>>> operation = sample.async_recognize(max_alternatives=2)
>>> retry_count = 100
>>> while retry_count > 0 and not operation.complete:
... retry_count -= 1
Expand Down Expand Up @@ -94,8 +94,7 @@ Great Britian.
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
... encoding=speech.Encoding.FLAC,
... sample_rate=44100)
>>> operation = client.async_recognize(sample, max_alternatives=2)
>>> alternatives = client.sync_recognize(
>>> alternatives = sample.sync_recognize(
... speech.Encoding.FLAC, 16000,
... source_uri='gs://my-bucket/recording.flac', language_code='en-GB',
... max_alternatives=2)
Expand All @@ -119,7 +118,7 @@ Example of using the profanity filter.
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
... encoding=speech.Encoding.FLAC,
... sample_rate=44100)
>>> alternatives = client.sync_recognize(sample, max_alternatives=1,
>>> alternatives = sample.sync_recognize(max_alternatives=1,
... profanity_filter=True)
>>> for alternative in alternatives:
... print('=' * 20)
Expand All @@ -141,7 +140,7 @@ words to the vocabulary of the recognizer.
... encoding=speech.Encoding.FLAC,
... sample_rate=44100)
>>> hints = ['hi', 'good afternoon']
>>> alternatives = client.sync_recognize(sample, max_alternatives=2,
>>> alternatives = sample.sync_recognize(max_alternatives=2,
... speech_context=hints)
>>> for alternative in alternatives:
... print('=' * 20)
Expand Down Expand Up @@ -171,7 +170,7 @@ speech data to possible text alternatives on the fly.
... sample = client.sample(content=stream,
... encoding=speech.Encoding.LINEAR16,
... sample_rate=16000)
... results = list(client.streaming_recognize(sample))
... results = list(sample.streaming_recognize())
>>> print(results[0].alternatives[0].transcript)
'hello'
>>> print(results[0].alternatives[0].confidence)
Expand All @@ -194,8 +193,7 @@ See: `Single Utterance`_
... sample = client.sample(content=stream,
... encoding=speech.Encoding.LINEAR16,
... sample_rate=16000)
... responses = client.streaming_recognize(sample,
... single_utterance=True)
... responses = sample.streaming_recognize(single_utterance=True)
... results = list(responses)
>>> print(results[0].alternatives[0].transcript)
hello
Expand All @@ -214,8 +212,7 @@ If ``interim_results`` is set to :data:`True`, interim results
... sample = client.sample(content=stream,
... encoding=speech.Encoding.LINEAR16,
... sample_rate=16000)
... for results in client.streaming_recognize(sample,
... interim_results=True):
... for results in sample.streaming_recognize(interim_results=True):
... print('=' * 20)
... print(results[0].alternatives[0].transcript)
... print(results[0].alternatives[0].confidence)
Expand Down
196 changes: 2 additions & 194 deletions speech/google/cloud/speech/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
from google.cloud.speech._gax import GAPICSpeechAPI
from google.cloud.speech.alternative import Alternative
from google.cloud.speech.connection import Connection
from google.cloud.speech.encoding import Encoding
from google.cloud.speech.operation import Operation
from google.cloud.speech.result import StreamingSpeechResult
from google.cloud.speech.sample import Sample


Expand Down Expand Up @@ -65,58 +63,7 @@ def __init__(self, credentials=None, http=None, use_gax=None):
_connection_class = Connection
_speech_api = None

def async_recognize(self, sample, language_code=None,
max_alternatives=None, profanity_filter=None,
speech_context=None):
"""Asychronous Recognize request to Google Speech API.
.. _async_recognize: https://cloud.google.com/speech/reference/\
rest/v1beta1/speech/asyncrecognize
See `async_recognize`_.
:type sample: :class:`~google.cloud.speech.sample.Sample`
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: (Optional) The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: (Optional) Maximum number of recognition
hypotheses to be returned. The server may
return fewer than maxAlternatives.
Valid values are 0-30. A value of 0 or 1
will return a maximum of 1. Defaults to 1
:type profanity_filter: bool
:param profanity_filter: If True, the server will attempt to filter
out profanities, replacing all but the
initial character in each filtered word with
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: A list of strings (max 50) containing words and
phrases "hints" so that the speech recognition
is more likely to recognize them. This can be
used to improve the accuracy for specific words
and phrases. This can also be used to add new
words to the vocabulary of the recognizer.
:rtype: :class:`~google.cloud.speech.operation.Operation`
:returns: Operation for asynchronous request to Google Speech API.
"""
if sample.encoding is not Encoding.LINEAR16:
raise ValueError('Only LINEAR16 encoding is supported by '
'asynchronous speech requests.')
api = self.speech_api
return api.async_recognize(sample, language_code, max_alternatives,
profanity_filter, speech_context)

@staticmethod
def sample(content=None, source_uri=None, encoding=None,
def sample(self, content=None, source_uri=None, encoding=None,
sample_rate=None):
"""Factory: construct Sample to use when making recognize requests.
Expand Down Expand Up @@ -148,7 +95,7 @@ def sample(content=None, source_uri=None, encoding=None,
:returns: Instance of ``Sample``.
"""
return Sample(content=content, source_uri=source_uri,
encoding=encoding, sample_rate=sample_rate)
encoding=encoding, sample_rate=sample_rate, client=self)

@property
def speech_api(self):
Expand All @@ -160,145 +107,6 @@ def speech_api(self):
self._speech_api = _JSONSpeechAPI(self)
return self._speech_api

def streaming_recognize(self, sample, language_code=None,
max_alternatives=None, profanity_filter=None,
speech_context=None, single_utterance=False,
interim_results=False):
"""Streaming speech recognition.
.. note::
Streaming recognition requests are limited to 1 minute of audio.
See: https://cloud.google.com/speech/limits#content
Yields: Instance of
:class:`~google.cloud.speech.result.StreamingSpeechResult`
containing results and metadata from the streaming request.
:type sample: :class:`~google.cloud.speech.sample.Sample`
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: (Optional) The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: (Optional) Maximum number of recognition
hypotheses to be returned. The server may
return fewer than maxAlternatives.
Valid values are 0-30. A value of 0 or 1
will return a maximum of 1. Defaults to 1
:type profanity_filter: bool
:param profanity_filter: If True, the server will attempt to filter
out profanities, replacing all but the
initial character in each filtered word with
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: A list of strings (max 50) containing words and
phrases "hints" so that the speech recognition
is more likely to recognize them. This can be
used to improve the accuracy for specific words
and phrases. This can also be used to add new
words to the vocabulary of the recognizer.
:type single_utterance: bool
:param single_utterance: (Optional) If false or omitted, the recognizer
will perform continuous recognition
(continuing to process audio even if the user
pauses speaking) until the client closes the
output stream (gRPC API) or when the maximum
time limit has been reached. Multiple
SpeechRecognitionResults with the is_final
flag set to true may be returned.
If true, the recognizer will detect a single
spoken utterance. When it detects that the
user has paused or stopped speaking, it will
return an END_OF_UTTERANCE event and cease
recognition. It will return no more than one
SpeechRecognitionResult with the is_final flag
set to true.
:type interim_results: bool
:param interim_results: (Optional) If true, interim results (tentative
hypotheses) may be returned as they become
available (these interim results are indicated
with the ``is_final=False`` flag). If false or
omitted, only is_final=true result(s) are
returned.
:raises: EnvironmentError if gRPC is not available.
"""
if not self._use_gax:
raise EnvironmentError('gRPC is required to use this API.')

responses = self.speech_api.streaming_recognize(sample, language_code,
max_alternatives,
profanity_filter,
speech_context,
single_utterance,
interim_results)
for response in responses:
for result in response.results:
if result.is_final or interim_results:
yield StreamingSpeechResult.from_pb(result)

def sync_recognize(self, sample, language_code=None,
max_alternatives=None, profanity_filter=None,
speech_context=None):
"""Synchronous Speech Recognition.
.. _sync_recognize: https://cloud.google.com/speech/reference/\
rest/v1beta1/speech/syncrecognize
See `sync_recognize`_.
:type sample: :class:`~google.cloud.speech.sample.Sample`
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: (Optional) The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: (Optional) Maximum number of recognition
hypotheses to be returned. The server may
return fewer than maxAlternatives.
Valid values are 0-30. A value of 0 or 1
will return a maximum of 1. Defaults to 1
:type profanity_filter: bool
:param profanity_filter: If True, the server will attempt to filter
out profanities, replacing all but the
initial character in each filtered word with
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: A list of strings (max 50) containing words and
phrases "hints" so that the speech recognition
is more likely to recognize them. This can be
used to improve the accuracy for specific words
and phrases. This can also be used to add new
words to the vocabulary of the recognizer.
:rtype: list
:returns: A list of dictionaries. One dict for each alternative. Each
dictionary typically contains two keys (though not
all will be present in all cases)
* ``transcript``: The detected text from the audio recording.
* ``confidence``: The confidence in language detection, float
between 0 and 1.
"""
api = self.speech_api
return api.sync_recognize(sample, language_code, max_alternatives,
profanity_filter, speech_context)


class _JSONSpeechAPI(object):
"""Speech API for interacting with the JSON/REST version of the API.
Expand Down
Loading

0 comments on commit dfc967d

Please sign in to comment.